summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-04-28 17:18:26 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-04-28 17:18:26 +0000
commit0060aade95d582c42ee3fa5b8432d566fcd6a4a2 (patch)
tree7721dfaffe4f678bff14354d99040e1636d5e49c
parentbe8ea61add6f44abcbd71ee8ad95b2428f2097a8 (diff)
downloadperl-0060aade95d582c42ee3fa5b8432d566fcd6a4a2.tar.gz
Add a level of indirection to the implementation of \p{InFoo}
so that we don't have to have long filenames. (Nothing changes in the user interface.) The indirection is defined in the file lib/unicode/In.pl and it is handled in lib/utf8_heavy.pl. Also rename some the character classes by removing '-' from the classnames, and finally renamed Block.pl as Blocks.pl. p4raw-id: //depot/perl@9897
-rw-r--r--MANIFEST186
-rw-r--r--lib/unicode/Blocks.pl203
-rw-r--r--lib/unicode/In.pl101
-rw-r--r--lib/unicode/In/0.pl (renamed from lib/unicode/In/BasicLatin.pl)0
-rw-r--r--lib/unicode/In/1.pl (renamed from lib/unicode/In/Latin-1Supplement.pl)0
-rw-r--r--lib/unicode/In/10.pl (renamed from lib/unicode/In/Hebrew.pl)0
-rw-r--r--lib/unicode/In/11.pl (renamed from lib/unicode/In/Arabic.pl)0
-rw-r--r--lib/unicode/In/12.pl (renamed from lib/unicode/In/Syriac.pl)0
-rw-r--r--lib/unicode/In/13.pl (renamed from lib/unicode/In/Thaana.pl)0
-rw-r--r--lib/unicode/In/14.pl (renamed from lib/unicode/In/Devanagari.pl)0
-rw-r--r--lib/unicode/In/15.pl (renamed from lib/unicode/In/Bengali.pl)0
-rw-r--r--lib/unicode/In/16.pl (renamed from lib/unicode/In/Gurmukhi.pl)0
-rw-r--r--lib/unicode/In/17.pl (renamed from lib/unicode/In/Gujarati.pl)0
-rw-r--r--lib/unicode/In/18.pl (renamed from lib/unicode/In/Oriya.pl)0
-rw-r--r--lib/unicode/In/19.pl (renamed from lib/unicode/In/Tamil.pl)0
-rw-r--r--lib/unicode/In/2.pl (renamed from lib/unicode/In/LatinExtended-A.pl)0
-rw-r--r--lib/unicode/In/20.pl (renamed from lib/unicode/In/Telugu.pl)0
-rw-r--r--lib/unicode/In/21.pl (renamed from lib/unicode/In/Kannada.pl)0
-rw-r--r--lib/unicode/In/22.pl (renamed from lib/unicode/In/Malayalam.pl)0
-rw-r--r--lib/unicode/In/23.pl (renamed from lib/unicode/In/Sinhala.pl)0
-rw-r--r--lib/unicode/In/24.pl (renamed from lib/unicode/In/Thai.pl)0
-rw-r--r--lib/unicode/In/25.pl (renamed from lib/unicode/In/Lao.pl)0
-rw-r--r--lib/unicode/In/26.pl (renamed from lib/unicode/In/Tibetan.pl)0
-rw-r--r--lib/unicode/In/27.pl (renamed from lib/unicode/In/Myanmar.pl)0
-rw-r--r--lib/unicode/In/28.pl (renamed from lib/unicode/In/Georgian.pl)0
-rw-r--r--lib/unicode/In/29.pl (renamed from lib/unicode/In/HangulJamo.pl)0
-rw-r--r--lib/unicode/In/3.pl (renamed from lib/unicode/In/LatinExtended-B.pl)0
-rw-r--r--lib/unicode/In/30.pl (renamed from lib/unicode/In/Ethiopic.pl)0
-rw-r--r--lib/unicode/In/31.pl (renamed from lib/unicode/In/Cherokee.pl)0
-rw-r--r--lib/unicode/In/32.pl (renamed from lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl)0
-rw-r--r--lib/unicode/In/33.pl (renamed from lib/unicode/In/Ogham.pl)0
-rw-r--r--lib/unicode/In/34.pl (renamed from lib/unicode/In/Runic.pl)0
-rw-r--r--lib/unicode/In/35.pl (renamed from lib/unicode/In/Khmer.pl)0
-rw-r--r--lib/unicode/In/36.pl (renamed from lib/unicode/In/Mongolian.pl)0
-rw-r--r--lib/unicode/In/37.pl (renamed from lib/unicode/In/LatinExtendedAdditional.pl)0
-rw-r--r--lib/unicode/In/38.pl (renamed from lib/unicode/In/GreekExtended.pl)0
-rw-r--r--lib/unicode/In/39.pl (renamed from lib/unicode/In/GeneralPunctuation.pl)0
-rw-r--r--lib/unicode/In/4.pl (renamed from lib/unicode/In/IPAExtensions.pl)0
-rw-r--r--lib/unicode/In/40.pl (renamed from lib/unicode/In/SuperscriptsandSubscripts.pl)0
-rw-r--r--lib/unicode/In/41.pl (renamed from lib/unicode/In/CurrencySymbols.pl)0
-rw-r--r--lib/unicode/In/42.pl (renamed from lib/unicode/In/CombiningMarksforSymbols.pl)0
-rw-r--r--lib/unicode/In/43.pl (renamed from lib/unicode/In/LetterlikeSymbols.pl)0
-rw-r--r--lib/unicode/In/44.pl (renamed from lib/unicode/In/NumberForms.pl)0
-rw-r--r--lib/unicode/In/45.pl (renamed from lib/unicode/In/Arrows.pl)0
-rw-r--r--lib/unicode/In/46.pl (renamed from lib/unicode/In/MathematicalOperators.pl)0
-rw-r--r--lib/unicode/In/47.pl (renamed from lib/unicode/In/MiscellaneousTechnical.pl)0
-rw-r--r--lib/unicode/In/48.pl (renamed from lib/unicode/In/ControlPictures.pl)0
-rw-r--r--lib/unicode/In/49.pl (renamed from lib/unicode/In/OpticalCharacterRecognition.pl)0
-rw-r--r--lib/unicode/In/5.pl (renamed from lib/unicode/In/SpacingModifierLetters.pl)0
-rw-r--r--lib/unicode/In/50.pl (renamed from lib/unicode/In/EnclosedAlphanumerics.pl)0
-rw-r--r--lib/unicode/In/51.pl (renamed from lib/unicode/In/BoxDrawing.pl)0
-rw-r--r--lib/unicode/In/52.pl (renamed from lib/unicode/In/BlockElements.pl)0
-rw-r--r--lib/unicode/In/53.pl (renamed from lib/unicode/In/GeometricShapes.pl)0
-rw-r--r--lib/unicode/In/54.pl (renamed from lib/unicode/In/MiscellaneousSymbols.pl)0
-rw-r--r--lib/unicode/In/55.pl (renamed from lib/unicode/In/Dingbats.pl)0
-rw-r--r--lib/unicode/In/56.pl (renamed from lib/unicode/In/BraillePatterns.pl)0
-rw-r--r--lib/unicode/In/57.pl (renamed from lib/unicode/In/CJKRadicalsSupplement.pl)0
-rw-r--r--lib/unicode/In/58.pl (renamed from lib/unicode/In/KangxiRadicals.pl)0
-rw-r--r--lib/unicode/In/59.pl (renamed from lib/unicode/In/IdeographicDescriptionCharacters.pl)0
-rw-r--r--lib/unicode/In/6.pl (renamed from lib/unicode/In/CombiningDiacriticalMarks.pl)0
-rw-r--r--lib/unicode/In/60.pl (renamed from lib/unicode/In/CJKSymbolsandPunctuation.pl)0
-rw-r--r--lib/unicode/In/61.pl (renamed from lib/unicode/In/Hiragana.pl)0
-rw-r--r--lib/unicode/In/62.pl (renamed from lib/unicode/In/Katakana.pl)0
-rw-r--r--lib/unicode/In/63.pl (renamed from lib/unicode/In/Bopomofo.pl)0
-rw-r--r--lib/unicode/In/64.pl (renamed from lib/unicode/In/HangulCompatibilityJamo.pl)0
-rw-r--r--lib/unicode/In/65.pl (renamed from lib/unicode/In/Kanbun.pl)0
-rw-r--r--lib/unicode/In/66.pl (renamed from lib/unicode/In/BopomofoExtended.pl)0
-rw-r--r--lib/unicode/In/67.pl (renamed from lib/unicode/In/EnclosedCJKLettersandMonths.pl)0
-rw-r--r--lib/unicode/In/68.pl (renamed from lib/unicode/In/CJKCompatibility.pl)0
-rw-r--r--lib/unicode/In/69.pl (renamed from lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl)0
-rw-r--r--lib/unicode/In/7.pl (renamed from lib/unicode/In/Greek.pl)0
-rw-r--r--lib/unicode/In/70.pl (renamed from lib/unicode/In/CJKUnifiedIdeographs.pl)0
-rw-r--r--lib/unicode/In/71.pl (renamed from lib/unicode/In/YiSyllables.pl)0
-rw-r--r--lib/unicode/In/72.pl (renamed from lib/unicode/In/YiRadicals.pl)0
-rw-r--r--lib/unicode/In/73.pl (renamed from lib/unicode/In/HangulSyllables.pl)0
-rw-r--r--lib/unicode/In/74.pl (renamed from lib/unicode/In/HighSurrogates.pl)0
-rw-r--r--lib/unicode/In/75.pl (renamed from lib/unicode/In/HighPrivateUseSurrogates.pl)0
-rw-r--r--lib/unicode/In/76.pl (renamed from lib/unicode/In/LowSurrogates.pl)0
-rw-r--r--lib/unicode/In/77.pl6
-rw-r--r--lib/unicode/In/78.pl (renamed from lib/unicode/In/CJKCompatibilityIdeographs.pl)0
-rw-r--r--lib/unicode/In/79.pl (renamed from lib/unicode/In/AlphabeticPresentationForms.pl)0
-rw-r--r--lib/unicode/In/8.pl (renamed from lib/unicode/In/Cyrillic.pl)0
-rw-r--r--lib/unicode/In/80.pl (renamed from lib/unicode/In/ArabicPresentationForms-A.pl)0
-rw-r--r--lib/unicode/In/81.pl (renamed from lib/unicode/In/CombiningHalfMarks.pl)0
-rw-r--r--lib/unicode/In/82.pl (renamed from lib/unicode/In/CJKCompatibilityForms.pl)0
-rw-r--r--lib/unicode/In/83.pl (renamed from lib/unicode/In/SmallFormVariants.pl)0
-rw-r--r--lib/unicode/In/84.pl (renamed from lib/unicode/In/ArabicPresentationForms-B.pl)0
-rw-r--r--lib/unicode/In/85.pl (renamed from lib/unicode/In/Specials.pl)0
-rw-r--r--lib/unicode/In/86.pl (renamed from lib/unicode/In/HalfwidthandFullwidthForms.pl)0
-rw-r--r--lib/unicode/In/87.pl (renamed from lib/unicode/Block.pl)1
-rw-r--r--lib/unicode/In/88.pl6
-rw-r--r--lib/unicode/In/89.pl6
-rw-r--r--lib/unicode/In/9.pl (renamed from lib/unicode/In/Armenian.pl)0
-rw-r--r--lib/unicode/In/90.pl6
-rw-r--r--lib/unicode/In/91.pl6
-rw-r--r--lib/unicode/In/92.pl6
-rw-r--r--lib/unicode/In/93.pl6
-rw-r--r--lib/unicode/In/94.pl6
-rw-r--r--lib/unicode/In/95.pl6
-rw-r--r--lib/unicode/In/PrivateUse.pl6
-rwxr-xr-xlib/unicode/mktables.PL51
-rw-r--r--lib/utf8_heavy.pl9
-rw-r--r--pod/perlunicode.pod9
103 files changed, 515 insertions, 105 deletions
diff --git a/MANIFEST b/MANIFEST
index 33e69ebb30..988302e628 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -888,7 +888,7 @@ lib/unicode/ArabLnkGrp.pl Unicode character database
lib/unicode/ArabShap.txt Unicode character database
lib/unicode/BidiMirr.txt Unicode character database
lib/unicode/Bidirectional.pl Unicode character database
-lib/unicode/Block.pl Unicode character database
+lib/unicode/Blocks.pl Unicode character database
lib/unicode/Blocks.txt Unicode character database
lib/unicode/CaseFold.txt Unicode character database
lib/unicode/Category.pl Unicode character database
@@ -896,93 +896,103 @@ lib/unicode/CombiningClass.pl Unicode character database
lib/unicode/CompExcl.txt Unicode character database
lib/unicode/Decomposition.pl Unicode character database
lib/unicode/EAWidth.txt Unicode character database
-lib/unicode/In/AlphabeticPresentationForms.pl Unicode character database
-lib/unicode/In/Arabic.pl Unicode character database
-lib/unicode/In/ArabicPresentationForms-A.pl Unicode character database
-lib/unicode/In/ArabicPresentationForms-B.pl Unicode character database
-lib/unicode/In/Armenian.pl Unicode character database
-lib/unicode/In/Arrows.pl Unicode character database
-lib/unicode/In/BasicLatin.pl Unicode character database
-lib/unicode/In/Bengali.pl Unicode character database
-lib/unicode/In/BlockElements.pl Unicode character database
-lib/unicode/In/Bopomofo.pl Unicode character database
-lib/unicode/In/BopomofoExtended.pl Unicode character database
-lib/unicode/In/BoxDrawing.pl Unicode character database
-lib/unicode/In/BraillePatterns.pl Unicode character database
-lib/unicode/In/CJKCompatibility.pl Unicode character database
-lib/unicode/In/CJKCompatibilityForms.pl Unicode character database
-lib/unicode/In/CJKCompatibilityIdeographs.pl Unicode character database
-lib/unicode/In/CJKRadicalsSupplement.pl Unicode character database
-lib/unicode/In/CJKSymbolsandPunctuation.pl Unicode character database
-lib/unicode/In/CJKUnifiedIdeographs.pl Unicode character database
-lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl Unicode character database
-lib/unicode/In/Cherokee.pl Unicode character database
-lib/unicode/In/CombiningDiacriticalMarks.pl Unicode character database
-lib/unicode/In/CombiningHalfMarks.pl Unicode character database
-lib/unicode/In/CombiningMarksforSymbols.pl Unicode character database
-lib/unicode/In/ControlPictures.pl Unicode character database
-lib/unicode/In/CurrencySymbols.pl Unicode character database
-lib/unicode/In/Cyrillic.pl Unicode character database
-lib/unicode/In/Devanagari.pl Unicode character database
-lib/unicode/In/Dingbats.pl Unicode character database
-lib/unicode/In/EnclosedAlphanumerics.pl Unicode character database
-lib/unicode/In/EnclosedCJKLettersandMonths.pl Unicode character database
-lib/unicode/In/Ethiopic.pl Unicode character database
-lib/unicode/In/GeneralPunctuation.pl Unicode character database
-lib/unicode/In/GeometricShapes.pl Unicode character database
-lib/unicode/In/Georgian.pl Unicode character database
-lib/unicode/In/Greek.pl Unicode character database
-lib/unicode/In/GreekExtended.pl Unicode character database
-lib/unicode/In/Gujarati.pl Unicode character database
-lib/unicode/In/Gurmukhi.pl Unicode character database
-lib/unicode/In/HalfwidthandFullwidthForms.pl Unicode character database
-lib/unicode/In/HangulCompatibilityJamo.pl Unicode character database
-lib/unicode/In/HangulJamo.pl Unicode character database
-lib/unicode/In/HangulSyllables.pl Unicode character database
-lib/unicode/In/Hebrew.pl Unicode character database
-lib/unicode/In/HighPrivateUseSurrogates.pl Unicode character database
-lib/unicode/In/HighSurrogates.pl Unicode character database
-lib/unicode/In/Hiragana.pl Unicode character database
-lib/unicode/In/IPAExtensions.pl Unicode character database
-lib/unicode/In/IdeographicDescriptionCharacters.pl Unicode character database
-lib/unicode/In/Kanbun.pl Unicode character database
-lib/unicode/In/KangxiRadicals.pl Unicode character database
-lib/unicode/In/Kannada.pl Unicode character database
-lib/unicode/In/Katakana.pl Unicode character database
-lib/unicode/In/Khmer.pl Unicode character database
-lib/unicode/In/Lao.pl Unicode character database
-lib/unicode/In/Latin-1Supplement.pl Unicode character database
-lib/unicode/In/LatinExtended-A.pl Unicode character database
-lib/unicode/In/LatinExtended-B.pl Unicode character database
-lib/unicode/In/LatinExtendedAdditional.pl Unicode character database
-lib/unicode/In/LetterlikeSymbols.pl Unicode character database
-lib/unicode/In/LowSurrogates.pl Unicode character database
-lib/unicode/In/Malayalam.pl Unicode character database
-lib/unicode/In/MathematicalOperators.pl Unicode character database
-lib/unicode/In/MiscellaneousSymbols.pl Unicode character database
-lib/unicode/In/MiscellaneousTechnical.pl Unicode character database
-lib/unicode/In/Mongolian.pl Unicode character database
-lib/unicode/In/Myanmar.pl Unicode character database
-lib/unicode/In/NumberForms.pl Unicode character database
-lib/unicode/In/Ogham.pl Unicode character database
-lib/unicode/In/OpticalCharacterRecognition.pl Unicode character database
-lib/unicode/In/Oriya.pl Unicode character database
-lib/unicode/In/PrivateUse.pl Unicode character database
-lib/unicode/In/Runic.pl Unicode character database
-lib/unicode/In/Sinhala.pl Unicode character database
-lib/unicode/In/SmallFormVariants.pl Unicode character database
-lib/unicode/In/SpacingModifierLetters.pl Unicode character database
-lib/unicode/In/Specials.pl Unicode character database
-lib/unicode/In/SuperscriptsandSubscripts.pl Unicode character database
-lib/unicode/In/Syriac.pl Unicode character database
-lib/unicode/In/Tamil.pl Unicode character database
-lib/unicode/In/Telugu.pl Unicode character database
-lib/unicode/In/Thaana.pl Unicode character database
-lib/unicode/In/Thai.pl Unicode character database
-lib/unicode/In/Tibetan.pl Unicode character database
-lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl Unicode character database
-lib/unicode/In/YiRadicals.pl Unicode character database
-lib/unicode/In/YiSyllables.pl Unicode character database
+lib/unicode/In.pl Unicode character database
+lib/unicode/In/0.pl Unicode character database
+lib/unicode/In/1.pl Unicode character database
+lib/unicode/In/2.pl Unicode character database
+lib/unicode/In/3.pl Unicode character database
+lib/unicode/In/4.pl Unicode character database
+lib/unicode/In/5.pl Unicode character database
+lib/unicode/In/6.pl Unicode character database
+lib/unicode/In/7.pl Unicode character database
+lib/unicode/In/8.pl Unicode character database
+lib/unicode/In/9.pl Unicode character database
+lib/unicode/In/10.pl Unicode character database
+lib/unicode/In/11.pl Unicode character database
+lib/unicode/In/12.pl Unicode character database
+lib/unicode/In/13.pl Unicode character database
+lib/unicode/In/14.pl Unicode character database
+lib/unicode/In/15.pl Unicode character database
+lib/unicode/In/16.pl Unicode character database
+lib/unicode/In/17.pl Unicode character database
+lib/unicode/In/18.pl Unicode character database
+lib/unicode/In/19.pl Unicode character database
+lib/unicode/In/20.pl Unicode character database
+lib/unicode/In/21.pl Unicode character database
+lib/unicode/In/22.pl Unicode character database
+lib/unicode/In/23.pl Unicode character database
+lib/unicode/In/24.pl Unicode character database
+lib/unicode/In/25.pl Unicode character database
+lib/unicode/In/26.pl Unicode character database
+lib/unicode/In/27.pl Unicode character database
+lib/unicode/In/28.pl Unicode character database
+lib/unicode/In/29.pl Unicode character database
+lib/unicode/In/30.pl Unicode character database
+lib/unicode/In/31.pl Unicode character database
+lib/unicode/In/32.pl Unicode character database
+lib/unicode/In/33.pl Unicode character database
+lib/unicode/In/34.pl Unicode character database
+lib/unicode/In/35.pl Unicode character database
+lib/unicode/In/36.pl Unicode character database
+lib/unicode/In/37.pl Unicode character database
+lib/unicode/In/38.pl Unicode character database
+lib/unicode/In/39.pl Unicode character database
+lib/unicode/In/40.pl Unicode character database
+lib/unicode/In/41.pl Unicode character database
+lib/unicode/In/42.pl Unicode character database
+lib/unicode/In/43.pl Unicode character database
+lib/unicode/In/44.pl Unicode character database
+lib/unicode/In/45.pl Unicode character database
+lib/unicode/In/46.pl Unicode character database
+lib/unicode/In/47.pl Unicode character database
+lib/unicode/In/48.pl Unicode character database
+lib/unicode/In/49.pl Unicode character database
+lib/unicode/In/50.pl Unicode character database
+lib/unicode/In/51.pl Unicode character database
+lib/unicode/In/52.pl Unicode character database
+lib/unicode/In/53.pl Unicode character database
+lib/unicode/In/54.pl Unicode character database
+lib/unicode/In/55.pl Unicode character database
+lib/unicode/In/56.pl Unicode character database
+lib/unicode/In/57.pl Unicode character database
+lib/unicode/In/58.pl Unicode character database
+lib/unicode/In/59.pl Unicode character database
+lib/unicode/In/60.pl Unicode character database
+lib/unicode/In/61.pl Unicode character database
+lib/unicode/In/62.pl Unicode character database
+lib/unicode/In/63.pl Unicode character database
+lib/unicode/In/64.pl Unicode character database
+lib/unicode/In/65.pl Unicode character database
+lib/unicode/In/66.pl Unicode character database
+lib/unicode/In/67.pl Unicode character database
+lib/unicode/In/68.pl Unicode character database
+lib/unicode/In/69.pl Unicode character database
+lib/unicode/In/70.pl Unicode character database
+lib/unicode/In/71.pl Unicode character database
+lib/unicode/In/72.pl Unicode character database
+lib/unicode/In/73.pl Unicode character database
+lib/unicode/In/74.pl Unicode character database
+lib/unicode/In/75.pl Unicode character database
+lib/unicode/In/76.pl Unicode character database
+lib/unicode/In/77.pl Unicode character database
+lib/unicode/In/78.pl Unicode character database
+lib/unicode/In/79.pl Unicode character database
+lib/unicode/In/80.pl Unicode character database
+lib/unicode/In/81.pl Unicode character database
+lib/unicode/In/82.pl Unicode character database
+lib/unicode/In/83.pl Unicode character database
+lib/unicode/In/84.pl Unicode character database
+lib/unicode/In/85.pl Unicode character database
+lib/unicode/In/86.pl Unicode character database
+lib/unicode/In/87.pl Unicode character database
+lib/unicode/In/88.pl Unicode character database
+lib/unicode/In/89.pl Unicode character database
+lib/unicode/In/90.pl Unicode character database
+lib/unicode/In/91.pl Unicode character database
+lib/unicode/In/92.pl Unicode character database
+lib/unicode/In/93.pl Unicode character database
+lib/unicode/In/94.pl Unicode character database
+lib/unicode/In/95.pl Unicode character database
lib/unicode/Index.txt Unicode character database
lib/unicode/Is/ASCII.pl Unicode character database
lib/unicode/Is/Alnum.pl Unicode character database
diff --git a/lib/unicode/Blocks.pl b/lib/unicode/Blocks.pl
new file mode 100644
index 0000000000..ef60058ba3
--- /dev/null
+++ b/lib/unicode/Blocks.pl
@@ -0,0 +1,203 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+0000 007F Basic Latin
+# In/0.pl BasicLatin
+0080 00FF Latin-1 Supplement
+# In/1.pl Latin1Supplement
+0100 017F Latin Extended-A
+# In/2.pl LatinExtendedA
+0180 024F Latin Extended-B
+# In/3.pl LatinExtendedB
+0250 02AF IPA Extensions
+# In/4.pl IPAExtensions
+02B0 02FF Spacing Modifier Letters
+# In/5.pl SpacingModifierLetters
+0300 036F Combining Diacritical Marks
+# In/6.pl CombiningDiacriticalMarks
+0370 03FF Greek
+# In/7.pl Greek
+0400 04FF Cyrillic
+# In/8.pl Cyrillic
+0530 058F Armenian
+# In/9.pl Armenian
+0590 05FF Hebrew
+# In/10.pl Hebrew
+0600 06FF Arabic
+# In/11.pl Arabic
+0700 074F Syriac
+# In/12.pl Syriac
+0780 07BF Thaana
+# In/13.pl Thaana
+0900 097F Devanagari
+# In/14.pl Devanagari
+0980 09FF Bengali
+# In/15.pl Bengali
+0A00 0A7F Gurmukhi
+# In/16.pl Gurmukhi
+0A80 0AFF Gujarati
+# In/17.pl Gujarati
+0B00 0B7F Oriya
+# In/18.pl Oriya
+0B80 0BFF Tamil
+# In/19.pl Tamil
+0C00 0C7F Telugu
+# In/20.pl Telugu
+0C80 0CFF Kannada
+# In/21.pl Kannada
+0D00 0D7F Malayalam
+# In/22.pl Malayalam
+0D80 0DFF Sinhala
+# In/23.pl Sinhala
+0E00 0E7F Thai
+# In/24.pl Thai
+0E80 0EFF Lao
+# In/25.pl Lao
+0F00 0FFF Tibetan
+# In/26.pl Tibetan
+1000 109F Myanmar
+# In/27.pl Myanmar
+10A0 10FF Georgian
+# In/28.pl Georgian
+1100 11FF Hangul Jamo
+# In/29.pl HangulJamo
+1200 137F Ethiopic
+# In/30.pl Ethiopic
+13A0 13FF Cherokee
+# In/31.pl Cherokee
+1400 167F Unified Canadian Aboriginal Syllabics
+# In/32.pl UnifiedCanadianAboriginalSyllabics
+1680 169F Ogham
+# In/33.pl Ogham
+16A0 16FF Runic
+# In/34.pl Runic
+1780 17FF Khmer
+# In/35.pl Khmer
+1800 18AF Mongolian
+# In/36.pl Mongolian
+1E00 1EFF Latin Extended Additional
+# In/37.pl LatinExtendedAdditional
+1F00 1FFF Greek Extended
+# In/38.pl GreekExtended
+2000 206F General Punctuation
+# In/39.pl GeneralPunctuation
+2070 209F Superscripts and Subscripts
+# In/40.pl SuperscriptsandSubscripts
+20A0 20CF Currency Symbols
+# In/41.pl CurrencySymbols
+20D0 20FF Combining Marks for Symbols
+# In/42.pl CombiningMarksforSymbols
+2100 214F Letterlike Symbols
+# In/43.pl LetterlikeSymbols
+2150 218F Number Forms
+# In/44.pl NumberForms
+2190 21FF Arrows
+# In/45.pl Arrows
+2200 22FF Mathematical Operators
+# In/46.pl MathematicalOperators
+2300 23FF Miscellaneous Technical
+# In/47.pl MiscellaneousTechnical
+2400 243F Control Pictures
+# In/48.pl ControlPictures
+2440 245F Optical Character Recognition
+# In/49.pl OpticalCharacterRecognition
+2460 24FF Enclosed Alphanumerics
+# In/50.pl EnclosedAlphanumerics
+2500 257F Box Drawing
+# In/51.pl BoxDrawing
+2580 259F Block Elements
+# In/52.pl BlockElements
+25A0 25FF Geometric Shapes
+# In/53.pl GeometricShapes
+2600 26FF Miscellaneous Symbols
+# In/54.pl MiscellaneousSymbols
+2700 27BF Dingbats
+# In/55.pl Dingbats
+2800 28FF Braille Patterns
+# In/56.pl BraillePatterns
+2E80 2EFF CJK Radicals Supplement
+# In/57.pl CJKRadicalsSupplement
+2F00 2FDF Kangxi Radicals
+# In/58.pl KangxiRadicals
+2FF0 2FFF Ideographic Description Characters
+# In/59.pl IdeographicDescriptionCharacters
+3000 303F CJK Symbols and Punctuation
+# In/60.pl CJKSymbolsandPunctuation
+3040 309F Hiragana
+# In/61.pl Hiragana
+30A0 30FF Katakana
+# In/62.pl Katakana
+3100 312F Bopomofo
+# In/63.pl Bopomofo
+3130 318F Hangul Compatibility Jamo
+# In/64.pl HangulCompatibilityJamo
+3190 319F Kanbun
+# In/65.pl Kanbun
+31A0 31BF Bopomofo Extended
+# In/66.pl BopomofoExtended
+3200 32FF Enclosed CJK Letters and Months
+# In/67.pl EnclosedCJKLettersandMonths
+3300 33FF CJK Compatibility
+# In/68.pl CJKCompatibility
+3400 4DB5 CJK Unified Ideographs Extension A
+# In/69.pl CJKUnifiedIdeographsExtensionA
+4E00 9FFF CJK Unified Ideographs
+# In/70.pl CJKUnifiedIdeographs
+A000 A48F Yi Syllables
+# In/71.pl YiSyllables
+A490 A4CF Yi Radicals
+# In/72.pl YiRadicals
+AC00 D7A3 Hangul Syllables
+# In/73.pl HangulSyllables
+D800 DB7F High Surrogates
+# In/74.pl HighSurrogates
+DB80 DBFF High Private Use Surrogates
+# In/75.pl HighPrivateUseSurrogates
+DC00 DFFF Low Surrogates
+# In/76.pl LowSurrogates
+E000 F8FF Private Use
+# In/77.pl PrivateUse
+F900 FAFF CJK Compatibility Ideographs
+# In/78.pl CJKCompatibilityIdeographs
+FB00 FB4F Alphabetic Presentation Forms
+# In/79.pl AlphabeticPresentationForms
+FB50 FDFF Arabic Presentation Forms-A
+# In/80.pl ArabicPresentationFormsA
+FE20 FE2F Combining Half Marks
+# In/81.pl CombiningHalfMarks
+FE30 FE4F CJK Compatibility Forms
+# In/82.pl CJKCompatibilityForms
+FE50 FE6F Small Form Variants
+# In/83.pl SmallFormVariants
+FE70 FEFE Arabic Presentation Forms-B
+# In/84.pl ArabicPresentationFormsB
+FEFF FEFF Specials
+# In/85.pl Specials
+FF00 FFEF Halfwidth and Fullwidth Forms
+# In/86.pl HalfwidthandFullwidthForms
+FFF0 FFFD Specials
+# In/85.pl Specials
+10300 1032F Old Italic
+# In/87.pl OldItalic
+10330 1034F Gothic
+# In/88.pl Gothic
+10400 1044F Deseret
+# In/89.pl Deseret
+1D000 1D0FF Byzantine Musical Symbols
+# In/90.pl ByzantineMusicalSymbols
+1D100 1D1FF Musical Symbols
+# In/91.pl MusicalSymbols
+1D400 1D7FF Mathematical Alphanumeric Symbols
+# In/92.pl MathematicalAlphanumericSymbols
+20000 2A6D6 CJK Unified Ideographs Extension B
+# In/93.pl CJKUnifiedIdeographsExtensionB
+2F800 2FA1F CJK Compatibility Ideographs Supplement
+# In/94.pl CJKCompatibilityIdeographsSupplement
+E0000 E007F Tags
+# In/95.pl Tags
+F0000 FFFFD Private Use
+# In/77.pl PrivateUse
+100000 10FFFD Private Use
+# In/77.pl PrivateUse
+END
diff --git a/lib/unicode/In.pl b/lib/unicode/In.pl
new file mode 100644
index 0000000000..eefec27310
--- /dev/null
+++ b/lib/unicode/In.pl
@@ -0,0 +1,101 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+%utf8::In = (
+'BasicLatin' => 0,
+'Latin1Supplement' => 1,
+'Hebrew' => 10,
+'Arabic' => 11,
+'Syriac' => 12,
+'Thaana' => 13,
+'Devanagari' => 14,
+'Bengali' => 15,
+'Gurmukhi' => 16,
+'Gujarati' => 17,
+'Oriya' => 18,
+'Tamil' => 19,
+'LatinExtendedA' => 2,
+'Telugu' => 20,
+'Kannada' => 21,
+'Malayalam' => 22,
+'Sinhala' => 23,
+'Thai' => 24,
+'Lao' => 25,
+'Tibetan' => 26,
+'Myanmar' => 27,
+'Georgian' => 28,
+'HangulJamo' => 29,
+'LatinExtendedB' => 3,
+'Ethiopic' => 30,
+'Cherokee' => 31,
+'UnifiedCanadianAboriginalSyllabics' => 32,
+'Ogham' => 33,
+'Runic' => 34,
+'Khmer' => 35,
+'Mongolian' => 36,
+'LatinExtendedAdditional' => 37,
+'GreekExtended' => 38,
+'GeneralPunctuation' => 39,
+'IPAExtensions' => 4,
+'SuperscriptsandSubscripts' => 40,
+'CurrencySymbols' => 41,
+'CombiningMarksforSymbols' => 42,
+'LetterlikeSymbols' => 43,
+'NumberForms' => 44,
+'Arrows' => 45,
+'MathematicalOperators' => 46,
+'MiscellaneousTechnical' => 47,
+'ControlPictures' => 48,
+'OpticalCharacterRecognition' => 49,
+'SpacingModifierLetters' => 5,
+'EnclosedAlphanumerics' => 50,
+'BoxDrawing' => 51,
+'BlockElements' => 52,
+'GeometricShapes' => 53,
+'MiscellaneousSymbols' => 54,
+'Dingbats' => 55,
+'BraillePatterns' => 56,
+'CJKRadicalsSupplement' => 57,
+'KangxiRadicals' => 58,
+'IdeographicDescriptionCharacters' => 59,
+'CombiningDiacriticalMarks' => 6,
+'CJKSymbolsandPunctuation' => 60,
+'Hiragana' => 61,
+'Katakana' => 62,
+'Bopomofo' => 63,
+'HangulCompatibilityJamo' => 64,
+'Kanbun' => 65,
+'BopomofoExtended' => 66,
+'EnclosedCJKLettersandMonths' => 67,
+'CJKCompatibility' => 68,
+'CJKUnifiedIdeographsExtensionA' => 69,
+'Greek' => 7,
+'CJKUnifiedIdeographs' => 70,
+'YiSyllables' => 71,
+'YiRadicals' => 72,
+'HangulSyllables' => 73,
+'HighSurrogates' => 74,
+'HighPrivateUseSurrogates' => 75,
+'LowSurrogates' => 76,
+'PrivateUse' => 77,
+'CJKCompatibilityIdeographs' => 78,
+'AlphabeticPresentationForms' => 79,
+'Cyrillic' => 8,
+'ArabicPresentationFormsA' => 80,
+'CombiningHalfMarks' => 81,
+'CJKCompatibilityForms' => 82,
+'SmallFormVariants' => 83,
+'ArabicPresentationFormsB' => 84,
+'Specials' => 85,
+'HalfwidthandFullwidthForms' => 86,
+'OldItalic' => 87,
+'Gothic' => 88,
+'Deseret' => 89,
+'Armenian' => 9,
+'ByzantineMusicalSymbols' => 90,
+'MusicalSymbols' => 91,
+'MathematicalAlphanumericSymbols' => 92,
+'CJKUnifiedIdeographsExtensionB' => 93,
+'CJKCompatibilityIdeographsSupplement' => 94,
+'Tags' => 95,
+);
diff --git a/lib/unicode/In/BasicLatin.pl b/lib/unicode/In/0.pl
index 475c1dfed0..475c1dfed0 100644
--- a/lib/unicode/In/BasicLatin.pl
+++ b/lib/unicode/In/0.pl
diff --git a/lib/unicode/In/Latin-1Supplement.pl b/lib/unicode/In/1.pl
index 5a5aa0e18c..5a5aa0e18c 100644
--- a/lib/unicode/In/Latin-1Supplement.pl
+++ b/lib/unicode/In/1.pl
diff --git a/lib/unicode/In/Hebrew.pl b/lib/unicode/In/10.pl
index f1d866c049..f1d866c049 100644
--- a/lib/unicode/In/Hebrew.pl
+++ b/lib/unicode/In/10.pl
diff --git a/lib/unicode/In/Arabic.pl b/lib/unicode/In/11.pl
index 7546a743b5..7546a743b5 100644
--- a/lib/unicode/In/Arabic.pl
+++ b/lib/unicode/In/11.pl
diff --git a/lib/unicode/In/Syriac.pl b/lib/unicode/In/12.pl
index e5247ad937..e5247ad937 100644
--- a/lib/unicode/In/Syriac.pl
+++ b/lib/unicode/In/12.pl
diff --git a/lib/unicode/In/Thaana.pl b/lib/unicode/In/13.pl
index 5bda401f7b..5bda401f7b 100644
--- a/lib/unicode/In/Thaana.pl
+++ b/lib/unicode/In/13.pl
diff --git a/lib/unicode/In/Devanagari.pl b/lib/unicode/In/14.pl
index a20b68d031..a20b68d031 100644
--- a/lib/unicode/In/Devanagari.pl
+++ b/lib/unicode/In/14.pl
diff --git a/lib/unicode/In/Bengali.pl b/lib/unicode/In/15.pl
index 306f653dbc..306f653dbc 100644
--- a/lib/unicode/In/Bengali.pl
+++ b/lib/unicode/In/15.pl
diff --git a/lib/unicode/In/Gurmukhi.pl b/lib/unicode/In/16.pl
index d37d4849ca..d37d4849ca 100644
--- a/lib/unicode/In/Gurmukhi.pl
+++ b/lib/unicode/In/16.pl
diff --git a/lib/unicode/In/Gujarati.pl b/lib/unicode/In/17.pl
index 65d853b314..65d853b314 100644
--- a/lib/unicode/In/Gujarati.pl
+++ b/lib/unicode/In/17.pl
diff --git a/lib/unicode/In/Oriya.pl b/lib/unicode/In/18.pl
index 14e1027fb6..14e1027fb6 100644
--- a/lib/unicode/In/Oriya.pl
+++ b/lib/unicode/In/18.pl
diff --git a/lib/unicode/In/Tamil.pl b/lib/unicode/In/19.pl
index a28ba3d909..a28ba3d909 100644
--- a/lib/unicode/In/Tamil.pl
+++ b/lib/unicode/In/19.pl
diff --git a/lib/unicode/In/LatinExtended-A.pl b/lib/unicode/In/2.pl
index 0f6acf9853..0f6acf9853 100644
--- a/lib/unicode/In/LatinExtended-A.pl
+++ b/lib/unicode/In/2.pl
diff --git a/lib/unicode/In/Telugu.pl b/lib/unicode/In/20.pl
index aff6cc93f4..aff6cc93f4 100644
--- a/lib/unicode/In/Telugu.pl
+++ b/lib/unicode/In/20.pl
diff --git a/lib/unicode/In/Kannada.pl b/lib/unicode/In/21.pl
index 41e05bdc3b..41e05bdc3b 100644
--- a/lib/unicode/In/Kannada.pl
+++ b/lib/unicode/In/21.pl
diff --git a/lib/unicode/In/Malayalam.pl b/lib/unicode/In/22.pl
index b42bbeea8d..b42bbeea8d 100644
--- a/lib/unicode/In/Malayalam.pl
+++ b/lib/unicode/In/22.pl
diff --git a/lib/unicode/In/Sinhala.pl b/lib/unicode/In/23.pl
index 00da6d144f..00da6d144f 100644
--- a/lib/unicode/In/Sinhala.pl
+++ b/lib/unicode/In/23.pl
diff --git a/lib/unicode/In/Thai.pl b/lib/unicode/In/24.pl
index 2fa00eb135..2fa00eb135 100644
--- a/lib/unicode/In/Thai.pl
+++ b/lib/unicode/In/24.pl
diff --git a/lib/unicode/In/Lao.pl b/lib/unicode/In/25.pl
index 5fd607c08f..5fd607c08f 100644
--- a/lib/unicode/In/Lao.pl
+++ b/lib/unicode/In/25.pl
diff --git a/lib/unicode/In/Tibetan.pl b/lib/unicode/In/26.pl
index 3ae5e6248d..3ae5e6248d 100644
--- a/lib/unicode/In/Tibetan.pl
+++ b/lib/unicode/In/26.pl
diff --git a/lib/unicode/In/Myanmar.pl b/lib/unicode/In/27.pl
index ecc3448361..ecc3448361 100644
--- a/lib/unicode/In/Myanmar.pl
+++ b/lib/unicode/In/27.pl
diff --git a/lib/unicode/In/Georgian.pl b/lib/unicode/In/28.pl
index 73a8818c71..73a8818c71 100644
--- a/lib/unicode/In/Georgian.pl
+++ b/lib/unicode/In/28.pl
diff --git a/lib/unicode/In/HangulJamo.pl b/lib/unicode/In/29.pl
index 692be7d813..692be7d813 100644
--- a/lib/unicode/In/HangulJamo.pl
+++ b/lib/unicode/In/29.pl
diff --git a/lib/unicode/In/LatinExtended-B.pl b/lib/unicode/In/3.pl
index 68f093234e..68f093234e 100644
--- a/lib/unicode/In/LatinExtended-B.pl
+++ b/lib/unicode/In/3.pl
diff --git a/lib/unicode/In/Ethiopic.pl b/lib/unicode/In/30.pl
index ad4776df99..ad4776df99 100644
--- a/lib/unicode/In/Ethiopic.pl
+++ b/lib/unicode/In/30.pl
diff --git a/lib/unicode/In/Cherokee.pl b/lib/unicode/In/31.pl
index f40dfa2be0..f40dfa2be0 100644
--- a/lib/unicode/In/Cherokee.pl
+++ b/lib/unicode/In/31.pl
diff --git a/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl b/lib/unicode/In/32.pl
index 7318008076..7318008076 100644
--- a/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl
+++ b/lib/unicode/In/32.pl
diff --git a/lib/unicode/In/Ogham.pl b/lib/unicode/In/33.pl
index 5d7bd970e8..5d7bd970e8 100644
--- a/lib/unicode/In/Ogham.pl
+++ b/lib/unicode/In/33.pl
diff --git a/lib/unicode/In/Runic.pl b/lib/unicode/In/34.pl
index d404cb6cfb..d404cb6cfb 100644
--- a/lib/unicode/In/Runic.pl
+++ b/lib/unicode/In/34.pl
diff --git a/lib/unicode/In/Khmer.pl b/lib/unicode/In/35.pl
index 2b0b198216..2b0b198216 100644
--- a/lib/unicode/In/Khmer.pl
+++ b/lib/unicode/In/35.pl
diff --git a/lib/unicode/In/Mongolian.pl b/lib/unicode/In/36.pl
index 06526c64d9..06526c64d9 100644
--- a/lib/unicode/In/Mongolian.pl
+++ b/lib/unicode/In/36.pl
diff --git a/lib/unicode/In/LatinExtendedAdditional.pl b/lib/unicode/In/37.pl
index c288810ca2..c288810ca2 100644
--- a/lib/unicode/In/LatinExtendedAdditional.pl
+++ b/lib/unicode/In/37.pl
diff --git a/lib/unicode/In/GreekExtended.pl b/lib/unicode/In/38.pl
index 74cd2c88e0..74cd2c88e0 100644
--- a/lib/unicode/In/GreekExtended.pl
+++ b/lib/unicode/In/38.pl
diff --git a/lib/unicode/In/GeneralPunctuation.pl b/lib/unicode/In/39.pl
index b9b0e7efaa..b9b0e7efaa 100644
--- a/lib/unicode/In/GeneralPunctuation.pl
+++ b/lib/unicode/In/39.pl
diff --git a/lib/unicode/In/IPAExtensions.pl b/lib/unicode/In/4.pl
index f6e9454fe0..f6e9454fe0 100644
--- a/lib/unicode/In/IPAExtensions.pl
+++ b/lib/unicode/In/4.pl
diff --git a/lib/unicode/In/SuperscriptsandSubscripts.pl b/lib/unicode/In/40.pl
index 2e36ac331c..2e36ac331c 100644
--- a/lib/unicode/In/SuperscriptsandSubscripts.pl
+++ b/lib/unicode/In/40.pl
diff --git a/lib/unicode/In/CurrencySymbols.pl b/lib/unicode/In/41.pl
index 12c67371cc..12c67371cc 100644
--- a/lib/unicode/In/CurrencySymbols.pl
+++ b/lib/unicode/In/41.pl
diff --git a/lib/unicode/In/CombiningMarksforSymbols.pl b/lib/unicode/In/42.pl
index 2d58a56712..2d58a56712 100644
--- a/lib/unicode/In/CombiningMarksforSymbols.pl
+++ b/lib/unicode/In/42.pl
diff --git a/lib/unicode/In/LetterlikeSymbols.pl b/lib/unicode/In/43.pl
index c735821edc..c735821edc 100644
--- a/lib/unicode/In/LetterlikeSymbols.pl
+++ b/lib/unicode/In/43.pl
diff --git a/lib/unicode/In/NumberForms.pl b/lib/unicode/In/44.pl
index a1949a194d..a1949a194d 100644
--- a/lib/unicode/In/NumberForms.pl
+++ b/lib/unicode/In/44.pl
diff --git a/lib/unicode/In/Arrows.pl b/lib/unicode/In/45.pl
index 799f739085..799f739085 100644
--- a/lib/unicode/In/Arrows.pl
+++ b/lib/unicode/In/45.pl
diff --git a/lib/unicode/In/MathematicalOperators.pl b/lib/unicode/In/46.pl
index 8bc8295cc5..8bc8295cc5 100644
--- a/lib/unicode/In/MathematicalOperators.pl
+++ b/lib/unicode/In/46.pl
diff --git a/lib/unicode/In/MiscellaneousTechnical.pl b/lib/unicode/In/47.pl
index 67867951d6..67867951d6 100644
--- a/lib/unicode/In/MiscellaneousTechnical.pl
+++ b/lib/unicode/In/47.pl
diff --git a/lib/unicode/In/ControlPictures.pl b/lib/unicode/In/48.pl
index 7aad2fcacf..7aad2fcacf 100644
--- a/lib/unicode/In/ControlPictures.pl
+++ b/lib/unicode/In/48.pl
diff --git a/lib/unicode/In/OpticalCharacterRecognition.pl b/lib/unicode/In/49.pl
index c7cecd02da..c7cecd02da 100644
--- a/lib/unicode/In/OpticalCharacterRecognition.pl
+++ b/lib/unicode/In/49.pl
diff --git a/lib/unicode/In/SpacingModifierLetters.pl b/lib/unicode/In/5.pl
index a242e0207a..a242e0207a 100644
--- a/lib/unicode/In/SpacingModifierLetters.pl
+++ b/lib/unicode/In/5.pl
diff --git a/lib/unicode/In/EnclosedAlphanumerics.pl b/lib/unicode/In/50.pl
index 7b1b778af0..7b1b778af0 100644
--- a/lib/unicode/In/EnclosedAlphanumerics.pl
+++ b/lib/unicode/In/50.pl
diff --git a/lib/unicode/In/BoxDrawing.pl b/lib/unicode/In/51.pl
index 4d446863fe..4d446863fe 100644
--- a/lib/unicode/In/BoxDrawing.pl
+++ b/lib/unicode/In/51.pl
diff --git a/lib/unicode/In/BlockElements.pl b/lib/unicode/In/52.pl
index 6135c93e90..6135c93e90 100644
--- a/lib/unicode/In/BlockElements.pl
+++ b/lib/unicode/In/52.pl
diff --git a/lib/unicode/In/GeometricShapes.pl b/lib/unicode/In/53.pl
index 855d98ebff..855d98ebff 100644
--- a/lib/unicode/In/GeometricShapes.pl
+++ b/lib/unicode/In/53.pl
diff --git a/lib/unicode/In/MiscellaneousSymbols.pl b/lib/unicode/In/54.pl
index 0949bc2b55..0949bc2b55 100644
--- a/lib/unicode/In/MiscellaneousSymbols.pl
+++ b/lib/unicode/In/54.pl
diff --git a/lib/unicode/In/Dingbats.pl b/lib/unicode/In/55.pl
index 3013f73c75..3013f73c75 100644
--- a/lib/unicode/In/Dingbats.pl
+++ b/lib/unicode/In/55.pl
diff --git a/lib/unicode/In/BraillePatterns.pl b/lib/unicode/In/56.pl
index d785c31676..d785c31676 100644
--- a/lib/unicode/In/BraillePatterns.pl
+++ b/lib/unicode/In/56.pl
diff --git a/lib/unicode/In/CJKRadicalsSupplement.pl b/lib/unicode/In/57.pl
index 2bf56517d1..2bf56517d1 100644
--- a/lib/unicode/In/CJKRadicalsSupplement.pl
+++ b/lib/unicode/In/57.pl
diff --git a/lib/unicode/In/KangxiRadicals.pl b/lib/unicode/In/58.pl
index 3903f15c4c..3903f15c4c 100644
--- a/lib/unicode/In/KangxiRadicals.pl
+++ b/lib/unicode/In/58.pl
diff --git a/lib/unicode/In/IdeographicDescriptionCharacters.pl b/lib/unicode/In/59.pl
index 07799e6941..07799e6941 100644
--- a/lib/unicode/In/IdeographicDescriptionCharacters.pl
+++ b/lib/unicode/In/59.pl
diff --git a/lib/unicode/In/CombiningDiacriticalMarks.pl b/lib/unicode/In/6.pl
index cf9bb94991..cf9bb94991 100644
--- a/lib/unicode/In/CombiningDiacriticalMarks.pl
+++ b/lib/unicode/In/6.pl
diff --git a/lib/unicode/In/CJKSymbolsandPunctuation.pl b/lib/unicode/In/60.pl
index 0c66f051a4..0c66f051a4 100644
--- a/lib/unicode/In/CJKSymbolsandPunctuation.pl
+++ b/lib/unicode/In/60.pl
diff --git a/lib/unicode/In/Hiragana.pl b/lib/unicode/In/61.pl
index 49b4e4976a..49b4e4976a 100644
--- a/lib/unicode/In/Hiragana.pl
+++ b/lib/unicode/In/61.pl
diff --git a/lib/unicode/In/Katakana.pl b/lib/unicode/In/62.pl
index e5568a283a..e5568a283a 100644
--- a/lib/unicode/In/Katakana.pl
+++ b/lib/unicode/In/62.pl
diff --git a/lib/unicode/In/Bopomofo.pl b/lib/unicode/In/63.pl
index 4f9b5f46b2..4f9b5f46b2 100644
--- a/lib/unicode/In/Bopomofo.pl
+++ b/lib/unicode/In/63.pl
diff --git a/lib/unicode/In/HangulCompatibilityJamo.pl b/lib/unicode/In/64.pl
index b15c4cc760..b15c4cc760 100644
--- a/lib/unicode/In/HangulCompatibilityJamo.pl
+++ b/lib/unicode/In/64.pl
diff --git a/lib/unicode/In/Kanbun.pl b/lib/unicode/In/65.pl
index d78c2088c0..d78c2088c0 100644
--- a/lib/unicode/In/Kanbun.pl
+++ b/lib/unicode/In/65.pl
diff --git a/lib/unicode/In/BopomofoExtended.pl b/lib/unicode/In/66.pl
index 96150b4f3f..96150b4f3f 100644
--- a/lib/unicode/In/BopomofoExtended.pl
+++ b/lib/unicode/In/66.pl
diff --git a/lib/unicode/In/EnclosedCJKLettersandMonths.pl b/lib/unicode/In/67.pl
index 2708fec7e3..2708fec7e3 100644
--- a/lib/unicode/In/EnclosedCJKLettersandMonths.pl
+++ b/lib/unicode/In/67.pl
diff --git a/lib/unicode/In/CJKCompatibility.pl b/lib/unicode/In/68.pl
index d504529398..d504529398 100644
--- a/lib/unicode/In/CJKCompatibility.pl
+++ b/lib/unicode/In/68.pl
diff --git a/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl b/lib/unicode/In/69.pl
index 83adb815d7..83adb815d7 100644
--- a/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl
+++ b/lib/unicode/In/69.pl
diff --git a/lib/unicode/In/Greek.pl b/lib/unicode/In/7.pl
index 8d89b7176b..8d89b7176b 100644
--- a/lib/unicode/In/Greek.pl
+++ b/lib/unicode/In/7.pl
diff --git a/lib/unicode/In/CJKUnifiedIdeographs.pl b/lib/unicode/In/70.pl
index f74552e661..f74552e661 100644
--- a/lib/unicode/In/CJKUnifiedIdeographs.pl
+++ b/lib/unicode/In/70.pl
diff --git a/lib/unicode/In/YiSyllables.pl b/lib/unicode/In/71.pl
index 0636a82e56..0636a82e56 100644
--- a/lib/unicode/In/YiSyllables.pl
+++ b/lib/unicode/In/71.pl
diff --git a/lib/unicode/In/YiRadicals.pl b/lib/unicode/In/72.pl
index 56404c5fb5..56404c5fb5 100644
--- a/lib/unicode/In/YiRadicals.pl
+++ b/lib/unicode/In/72.pl
diff --git a/lib/unicode/In/HangulSyllables.pl b/lib/unicode/In/73.pl
index e1e26945e5..e1e26945e5 100644
--- a/lib/unicode/In/HangulSyllables.pl
+++ b/lib/unicode/In/73.pl
diff --git a/lib/unicode/In/HighSurrogates.pl b/lib/unicode/In/74.pl
index 0f4eb5727b..0f4eb5727b 100644
--- a/lib/unicode/In/HighSurrogates.pl
+++ b/lib/unicode/In/74.pl
diff --git a/lib/unicode/In/HighPrivateUseSurrogates.pl b/lib/unicode/In/75.pl
index ec4ca07885..ec4ca07885 100644
--- a/lib/unicode/In/HighPrivateUseSurrogates.pl
+++ b/lib/unicode/In/75.pl
diff --git a/lib/unicode/In/LowSurrogates.pl b/lib/unicode/In/76.pl
index d056168c66..d056168c66 100644
--- a/lib/unicode/In/LowSurrogates.pl
+++ b/lib/unicode/In/76.pl
diff --git a/lib/unicode/In/77.pl b/lib/unicode/In/77.pl
new file mode 100644
index 0000000000..530166da95
--- /dev/null
+++ b/lib/unicode/In/77.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+100000 10FFFD
+END
diff --git a/lib/unicode/In/CJKCompatibilityIdeographs.pl b/lib/unicode/In/78.pl
index 0c553d6ee2..0c553d6ee2 100644
--- a/lib/unicode/In/CJKCompatibilityIdeographs.pl
+++ b/lib/unicode/In/78.pl
diff --git a/lib/unicode/In/AlphabeticPresentationForms.pl b/lib/unicode/In/79.pl
index 42cc1ca029..42cc1ca029 100644
--- a/lib/unicode/In/AlphabeticPresentationForms.pl
+++ b/lib/unicode/In/79.pl
diff --git a/lib/unicode/In/Cyrillic.pl b/lib/unicode/In/8.pl
index 0075ce1ddf..0075ce1ddf 100644
--- a/lib/unicode/In/Cyrillic.pl
+++ b/lib/unicode/In/8.pl
diff --git a/lib/unicode/In/ArabicPresentationForms-A.pl b/lib/unicode/In/80.pl
index ffb4f1eb3a..ffb4f1eb3a 100644
--- a/lib/unicode/In/ArabicPresentationForms-A.pl
+++ b/lib/unicode/In/80.pl
diff --git a/lib/unicode/In/CombiningHalfMarks.pl b/lib/unicode/In/81.pl
index cc8a4a21b1..cc8a4a21b1 100644
--- a/lib/unicode/In/CombiningHalfMarks.pl
+++ b/lib/unicode/In/81.pl
diff --git a/lib/unicode/In/CJKCompatibilityForms.pl b/lib/unicode/In/82.pl
index 4e462b8402..4e462b8402 100644
--- a/lib/unicode/In/CJKCompatibilityForms.pl
+++ b/lib/unicode/In/82.pl
diff --git a/lib/unicode/In/SmallFormVariants.pl b/lib/unicode/In/83.pl
index 4eff1ea01e..4eff1ea01e 100644
--- a/lib/unicode/In/SmallFormVariants.pl
+++ b/lib/unicode/In/83.pl
diff --git a/lib/unicode/In/ArabicPresentationForms-B.pl b/lib/unicode/In/84.pl
index dc5a32e4b1..dc5a32e4b1 100644
--- a/lib/unicode/In/ArabicPresentationForms-B.pl
+++ b/lib/unicode/In/84.pl
diff --git a/lib/unicode/In/Specials.pl b/lib/unicode/In/85.pl
index 931fc5b902..931fc5b902 100644
--- a/lib/unicode/In/Specials.pl
+++ b/lib/unicode/In/85.pl
diff --git a/lib/unicode/In/HalfwidthandFullwidthForms.pl b/lib/unicode/In/86.pl
index 03e85154fb..03e85154fb 100644
--- a/lib/unicode/In/HalfwidthandFullwidthForms.pl
+++ b/lib/unicode/In/86.pl
diff --git a/lib/unicode/Block.pl b/lib/unicode/In/87.pl
index 272f63fc9f..44a5e47510 100644
--- a/lib/unicode/Block.pl
+++ b/lib/unicode/In/87.pl
@@ -2,4 +2,5 @@
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
+10300 1032F
END
diff --git a/lib/unicode/In/88.pl b/lib/unicode/In/88.pl
new file mode 100644
index 0000000000..803041101c
--- /dev/null
+++ b/lib/unicode/In/88.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+10330 1034F
+END
diff --git a/lib/unicode/In/89.pl b/lib/unicode/In/89.pl
new file mode 100644
index 0000000000..d2c50bbcad
--- /dev/null
+++ b/lib/unicode/In/89.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+10400 1044F
+END
diff --git a/lib/unicode/In/Armenian.pl b/lib/unicode/In/9.pl
index a6d50e3be5..a6d50e3be5 100644
--- a/lib/unicode/In/Armenian.pl
+++ b/lib/unicode/In/9.pl
diff --git a/lib/unicode/In/90.pl b/lib/unicode/In/90.pl
new file mode 100644
index 0000000000..f1073c7392
--- /dev/null
+++ b/lib/unicode/In/90.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+1D000 1D0FF
+END
diff --git a/lib/unicode/In/91.pl b/lib/unicode/In/91.pl
new file mode 100644
index 0000000000..7435889d7c
--- /dev/null
+++ b/lib/unicode/In/91.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+1D100 1D1FF
+END
diff --git a/lib/unicode/In/92.pl b/lib/unicode/In/92.pl
new file mode 100644
index 0000000000..7e40edc3ed
--- /dev/null
+++ b/lib/unicode/In/92.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+1D400 1D7FF
+END
diff --git a/lib/unicode/In/93.pl b/lib/unicode/In/93.pl
new file mode 100644
index 0000000000..931aec3891
--- /dev/null
+++ b/lib/unicode/In/93.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+20000 2A6D6
+END
diff --git a/lib/unicode/In/94.pl b/lib/unicode/In/94.pl
new file mode 100644
index 0000000000..c025148c04
--- /dev/null
+++ b/lib/unicode/In/94.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+2F800 2FA1F
+END
diff --git a/lib/unicode/In/95.pl b/lib/unicode/In/95.pl
new file mode 100644
index 0000000000..495d2d581d
--- /dev/null
+++ b/lib/unicode/In/95.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+E0000 E007F
+END
diff --git a/lib/unicode/In/PrivateUse.pl b/lib/unicode/In/PrivateUse.pl
deleted file mode 100644
index c81b567a74..0000000000
--- a/lib/unicode/In/PrivateUse.pl
+++ /dev/null
@@ -1,6 +0,0 @@
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-E000 F8FF
-END
diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL
index 818785452b..68578b974f 100755
--- a/lib/unicode/mktables.PL
+++ b/lib/unicode/mktables.PL
@@ -231,11 +231,24 @@ mkdir "To", 0755;
# This is not written for speed...
+my %InId;
+my $InId = 0;
+
foreach $file (@todo) {
my ($table, $wanted, $val) = @$file;
next if @ARGV and not grep { $_ eq $table } @ARGV;
- print $table,"\n";
- if ($table =~ /^(Is|In|To)(.*)/) {
+ print $table, "\n";
+ $table =~ s/\W+//g;
+ if ($table =~ /^In(.+)/) {
+ my $id;
+ unless (exists $InId{$1}) {
+ $InId{$1} = $InId++;
+ }
+ $id = $InId{$1};
+ open(OUT, ">In/$id.pl") or die "Can't create In/$id.pl: $!\n";
+ print OUT "# In/$id.pl $1\n";
+ }
+ elsif ($table =~ /^(Is|To)(.+)/) {
open(OUT, ">$1/$2.pl") or die "Can't create $1/$2.pl: $!\n";
}
else {
@@ -257,9 +270,9 @@ END
# Must treat blocks specially.
exit if @ARGV and not grep { $_ eq Block } @ARGV;
-print "Block\n";
+print "Blocks\n";
open(UD, 'Blocks.txt') or die "Can't open Blocks.txt: $!\n";
-open(OUT, ">Block.pl") or die "Can't create Block.pl: $!\n";
+open(OUT, ">Blocks.pl") or die "Can't create Blocks.pl: $!\n";
print OUT <<EOH;
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by $0 from e.g. $UnicodeData.
@@ -273,11 +286,17 @@ while (<UD>) {
next if /^#/;
next if /^$/;
chomp;
- ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]); (.+)/i;
+ ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+)/i;
if ($name) {
print OUT "$code $last $name\n";
- $name =~ s/\s+//g;
- open(BLOCK, ">In/$name.pl");
+ $name =~ s/\W+//g;
+ my $id;
+ unless (exists $InId{$name}) {
+ $InId{$name} = $InId++;
+ }
+ $id = $InId{$name};
+ open(BLOCK, ">In/$id.pl");
+ print OUT "# In/$id.pl $name\n";
print BLOCK <<EOH;
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by $0 from e.g. $UnicodeData.
@@ -295,6 +314,24 @@ END2
print OUT "END\n";
close OUT;
+open(INID, ">In.pl");
+
+print INID <<EOH;
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by $0 from e.g. $UnicodeData.
+# Any changes made here will be lost!
+%utf8::In = (
+EOH
+
+# Order doesn't matter but let's prettyprint anyway.
+foreach my $in (sort { $InId{$a} cmp $InId{$b} } keys %InId) {
+ print INID "'$in' => $InId{$in},\n";
+}
+
+print INID ");\n";
+
+close(INID);
+
##################################################
sub proplist {
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl
index 8649e9e07e..5637d12afa 100644
--- a/lib/utf8_heavy.pl
+++ b/lib/utf8_heavy.pl
@@ -26,7 +26,14 @@ sub SWASHNEW {
while (($caller = caller($i)) eq __PACKAGE__) { $i++ }
my $encoding = $enc{$caller} || "unicode";
(my $file = $type) =~ s!::!/!g;
- $file =~ s#^(I[sn]|To)([A-Z].*)#$1/$2#;
+ if ($file =~ /^In(.+)/) {
+ defined %utf8::In || do "$encoding/In.pl";
+ if (exists $utf8::In{$1}) {
+ $file = "$enconding/In/$utf8::In{$1}";
+ }
+ } else {
+ $file =~ s#^(Is|To)([A-Z].*)#$1/$2#;
+ }
$list ||= eval { $caller->$type(); }
|| do "$file.pl"
|| do "$encoding/$file.pl"
diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod
index 8ddcdd2b06..12bee5c7a3 100644
--- a/pod/perlunicode.pod
+++ b/pod/perlunicode.pod
@@ -158,9 +158,12 @@ Named Unicode properties and block ranges make be used as character
classes via the new C<\p{}> (matches property) and C<\P{}> (doesn't
match property) constructs. For instance, C<\p{Lu}> matches any
character with the Unicode uppercase property, while C<\p{M}> matches
-any mark character. Single letter properties may omit the brackets, so
-that can be written C<\pM> also. Many predefined character classes are
-available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>.
+any mark character. Single letter properties may omit the brackets,
+so that can be written C<\pM> also. Many predefined character classes
+are available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>. The
+names of the C<In> classes are the official Unicode block names but
+with all non-alphanumeric characters removed, for example the block
+name C<"Latin-1 Supplement"> becomes C<\p{InLatin1Supplement}>.
=item *