diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-04-28 17:18:26 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-04-28 17:18:26 +0000 |
commit | 0060aade95d582c42ee3fa5b8432d566fcd6a4a2 (patch) | |
tree | 7721dfaffe4f678bff14354d99040e1636d5e49c | |
parent | be8ea61add6f44abcbd71ee8ad95b2428f2097a8 (diff) | |
download | perl-0060aade95d582c42ee3fa5b8432d566fcd6a4a2.tar.gz |
Add a level of indirection to the implementation of \p{InFoo}
so that we don't have to have long filenames. (Nothing changes
in the user interface.) The indirection is defined in
the file lib/unicode/In.pl and it is handled in lib/utf8_heavy.pl.
Also rename some the character classes by removing '-' from
the classnames, and finally renamed Block.pl as Blocks.pl.
p4raw-id: //depot/perl@9897
-rw-r--r-- | MANIFEST | 186 | ||||
-rw-r--r-- | lib/unicode/Blocks.pl | 203 | ||||
-rw-r--r-- | lib/unicode/In.pl | 101 | ||||
-rw-r--r-- | lib/unicode/In/0.pl (renamed from lib/unicode/In/BasicLatin.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/1.pl (renamed from lib/unicode/In/Latin-1Supplement.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/10.pl (renamed from lib/unicode/In/Hebrew.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/11.pl (renamed from lib/unicode/In/Arabic.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/12.pl (renamed from lib/unicode/In/Syriac.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/13.pl (renamed from lib/unicode/In/Thaana.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/14.pl (renamed from lib/unicode/In/Devanagari.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/15.pl (renamed from lib/unicode/In/Bengali.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/16.pl (renamed from lib/unicode/In/Gurmukhi.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/17.pl (renamed from lib/unicode/In/Gujarati.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/18.pl (renamed from lib/unicode/In/Oriya.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/19.pl (renamed from lib/unicode/In/Tamil.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/2.pl (renamed from lib/unicode/In/LatinExtended-A.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/20.pl (renamed from lib/unicode/In/Telugu.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/21.pl (renamed from lib/unicode/In/Kannada.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/22.pl (renamed from lib/unicode/In/Malayalam.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/23.pl (renamed from lib/unicode/In/Sinhala.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/24.pl (renamed from lib/unicode/In/Thai.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/25.pl (renamed from lib/unicode/In/Lao.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/26.pl (renamed from lib/unicode/In/Tibetan.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/27.pl (renamed from lib/unicode/In/Myanmar.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/28.pl (renamed from lib/unicode/In/Georgian.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/29.pl (renamed from lib/unicode/In/HangulJamo.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/3.pl (renamed from lib/unicode/In/LatinExtended-B.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/30.pl (renamed from lib/unicode/In/Ethiopic.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/31.pl (renamed from lib/unicode/In/Cherokee.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/32.pl (renamed from lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/33.pl (renamed from lib/unicode/In/Ogham.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/34.pl (renamed from lib/unicode/In/Runic.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/35.pl (renamed from lib/unicode/In/Khmer.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/36.pl (renamed from lib/unicode/In/Mongolian.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/37.pl (renamed from lib/unicode/In/LatinExtendedAdditional.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/38.pl (renamed from lib/unicode/In/GreekExtended.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/39.pl (renamed from lib/unicode/In/GeneralPunctuation.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/4.pl (renamed from lib/unicode/In/IPAExtensions.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/40.pl (renamed from lib/unicode/In/SuperscriptsandSubscripts.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/41.pl (renamed from lib/unicode/In/CurrencySymbols.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/42.pl (renamed from lib/unicode/In/CombiningMarksforSymbols.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/43.pl (renamed from lib/unicode/In/LetterlikeSymbols.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/44.pl (renamed from lib/unicode/In/NumberForms.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/45.pl (renamed from lib/unicode/In/Arrows.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/46.pl (renamed from lib/unicode/In/MathematicalOperators.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/47.pl (renamed from lib/unicode/In/MiscellaneousTechnical.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/48.pl (renamed from lib/unicode/In/ControlPictures.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/49.pl (renamed from lib/unicode/In/OpticalCharacterRecognition.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/5.pl (renamed from lib/unicode/In/SpacingModifierLetters.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/50.pl (renamed from lib/unicode/In/EnclosedAlphanumerics.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/51.pl (renamed from lib/unicode/In/BoxDrawing.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/52.pl (renamed from lib/unicode/In/BlockElements.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/53.pl (renamed from lib/unicode/In/GeometricShapes.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/54.pl (renamed from lib/unicode/In/MiscellaneousSymbols.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/55.pl (renamed from lib/unicode/In/Dingbats.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/56.pl (renamed from lib/unicode/In/BraillePatterns.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/57.pl (renamed from lib/unicode/In/CJKRadicalsSupplement.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/58.pl (renamed from lib/unicode/In/KangxiRadicals.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/59.pl (renamed from lib/unicode/In/IdeographicDescriptionCharacters.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/6.pl (renamed from lib/unicode/In/CombiningDiacriticalMarks.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/60.pl (renamed from lib/unicode/In/CJKSymbolsandPunctuation.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/61.pl (renamed from lib/unicode/In/Hiragana.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/62.pl (renamed from lib/unicode/In/Katakana.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/63.pl (renamed from lib/unicode/In/Bopomofo.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/64.pl (renamed from lib/unicode/In/HangulCompatibilityJamo.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/65.pl (renamed from lib/unicode/In/Kanbun.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/66.pl (renamed from lib/unicode/In/BopomofoExtended.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/67.pl (renamed from lib/unicode/In/EnclosedCJKLettersandMonths.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/68.pl (renamed from lib/unicode/In/CJKCompatibility.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/69.pl (renamed from lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/7.pl (renamed from lib/unicode/In/Greek.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/70.pl (renamed from lib/unicode/In/CJKUnifiedIdeographs.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/71.pl (renamed from lib/unicode/In/YiSyllables.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/72.pl (renamed from lib/unicode/In/YiRadicals.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/73.pl (renamed from lib/unicode/In/HangulSyllables.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/74.pl (renamed from lib/unicode/In/HighSurrogates.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/75.pl (renamed from lib/unicode/In/HighPrivateUseSurrogates.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/76.pl (renamed from lib/unicode/In/LowSurrogates.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/77.pl | 6 | ||||
-rw-r--r-- | lib/unicode/In/78.pl (renamed from lib/unicode/In/CJKCompatibilityIdeographs.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/79.pl (renamed from lib/unicode/In/AlphabeticPresentationForms.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/8.pl (renamed from lib/unicode/In/Cyrillic.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/80.pl (renamed from lib/unicode/In/ArabicPresentationForms-A.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/81.pl (renamed from lib/unicode/In/CombiningHalfMarks.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/82.pl (renamed from lib/unicode/In/CJKCompatibilityForms.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/83.pl (renamed from lib/unicode/In/SmallFormVariants.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/84.pl (renamed from lib/unicode/In/ArabicPresentationForms-B.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/85.pl (renamed from lib/unicode/In/Specials.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/86.pl (renamed from lib/unicode/In/HalfwidthandFullwidthForms.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/87.pl (renamed from lib/unicode/Block.pl) | 1 | ||||
-rw-r--r-- | lib/unicode/In/88.pl | 6 | ||||
-rw-r--r-- | lib/unicode/In/89.pl | 6 | ||||
-rw-r--r-- | lib/unicode/In/9.pl (renamed from lib/unicode/In/Armenian.pl) | 0 | ||||
-rw-r--r-- | lib/unicode/In/90.pl | 6 | ||||
-rw-r--r-- | lib/unicode/In/91.pl | 6 | ||||
-rw-r--r-- | lib/unicode/In/92.pl | 6 | ||||
-rw-r--r-- | lib/unicode/In/93.pl | 6 | ||||
-rw-r--r-- | lib/unicode/In/94.pl | 6 | ||||
-rw-r--r-- | lib/unicode/In/95.pl | 6 | ||||
-rw-r--r-- | lib/unicode/In/PrivateUse.pl | 6 | ||||
-rwxr-xr-x | lib/unicode/mktables.PL | 51 | ||||
-rw-r--r-- | lib/utf8_heavy.pl | 9 | ||||
-rw-r--r-- | pod/perlunicode.pod | 9 |
103 files changed, 515 insertions, 105 deletions
@@ -888,7 +888,7 @@ lib/unicode/ArabLnkGrp.pl Unicode character database lib/unicode/ArabShap.txt Unicode character database lib/unicode/BidiMirr.txt Unicode character database lib/unicode/Bidirectional.pl Unicode character database -lib/unicode/Block.pl Unicode character database +lib/unicode/Blocks.pl Unicode character database lib/unicode/Blocks.txt Unicode character database lib/unicode/CaseFold.txt Unicode character database lib/unicode/Category.pl Unicode character database @@ -896,93 +896,103 @@ lib/unicode/CombiningClass.pl Unicode character database lib/unicode/CompExcl.txt Unicode character database lib/unicode/Decomposition.pl Unicode character database lib/unicode/EAWidth.txt Unicode character database -lib/unicode/In/AlphabeticPresentationForms.pl Unicode character database -lib/unicode/In/Arabic.pl Unicode character database -lib/unicode/In/ArabicPresentationForms-A.pl Unicode character database -lib/unicode/In/ArabicPresentationForms-B.pl Unicode character database -lib/unicode/In/Armenian.pl Unicode character database -lib/unicode/In/Arrows.pl Unicode character database -lib/unicode/In/BasicLatin.pl Unicode character database -lib/unicode/In/Bengali.pl Unicode character database -lib/unicode/In/BlockElements.pl Unicode character database -lib/unicode/In/Bopomofo.pl Unicode character database -lib/unicode/In/BopomofoExtended.pl Unicode character database -lib/unicode/In/BoxDrawing.pl Unicode character database -lib/unicode/In/BraillePatterns.pl Unicode character database -lib/unicode/In/CJKCompatibility.pl Unicode character database -lib/unicode/In/CJKCompatibilityForms.pl Unicode character database -lib/unicode/In/CJKCompatibilityIdeographs.pl Unicode character database -lib/unicode/In/CJKRadicalsSupplement.pl Unicode character database -lib/unicode/In/CJKSymbolsandPunctuation.pl Unicode character database -lib/unicode/In/CJKUnifiedIdeographs.pl Unicode character database -lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl Unicode character database -lib/unicode/In/Cherokee.pl Unicode character database -lib/unicode/In/CombiningDiacriticalMarks.pl Unicode character database -lib/unicode/In/CombiningHalfMarks.pl Unicode character database -lib/unicode/In/CombiningMarksforSymbols.pl Unicode character database -lib/unicode/In/ControlPictures.pl Unicode character database -lib/unicode/In/CurrencySymbols.pl Unicode character database -lib/unicode/In/Cyrillic.pl Unicode character database -lib/unicode/In/Devanagari.pl Unicode character database -lib/unicode/In/Dingbats.pl Unicode character database -lib/unicode/In/EnclosedAlphanumerics.pl Unicode character database -lib/unicode/In/EnclosedCJKLettersandMonths.pl Unicode character database -lib/unicode/In/Ethiopic.pl Unicode character database -lib/unicode/In/GeneralPunctuation.pl Unicode character database -lib/unicode/In/GeometricShapes.pl Unicode character database -lib/unicode/In/Georgian.pl Unicode character database -lib/unicode/In/Greek.pl Unicode character database -lib/unicode/In/GreekExtended.pl Unicode character database -lib/unicode/In/Gujarati.pl Unicode character database -lib/unicode/In/Gurmukhi.pl Unicode character database -lib/unicode/In/HalfwidthandFullwidthForms.pl Unicode character database -lib/unicode/In/HangulCompatibilityJamo.pl Unicode character database -lib/unicode/In/HangulJamo.pl Unicode character database -lib/unicode/In/HangulSyllables.pl Unicode character database -lib/unicode/In/Hebrew.pl Unicode character database -lib/unicode/In/HighPrivateUseSurrogates.pl Unicode character database -lib/unicode/In/HighSurrogates.pl Unicode character database -lib/unicode/In/Hiragana.pl Unicode character database -lib/unicode/In/IPAExtensions.pl Unicode character database -lib/unicode/In/IdeographicDescriptionCharacters.pl Unicode character database -lib/unicode/In/Kanbun.pl Unicode character database -lib/unicode/In/KangxiRadicals.pl Unicode character database -lib/unicode/In/Kannada.pl Unicode character database -lib/unicode/In/Katakana.pl Unicode character database -lib/unicode/In/Khmer.pl Unicode character database -lib/unicode/In/Lao.pl Unicode character database -lib/unicode/In/Latin-1Supplement.pl Unicode character database -lib/unicode/In/LatinExtended-A.pl Unicode character database -lib/unicode/In/LatinExtended-B.pl Unicode character database -lib/unicode/In/LatinExtendedAdditional.pl Unicode character database -lib/unicode/In/LetterlikeSymbols.pl Unicode character database -lib/unicode/In/LowSurrogates.pl Unicode character database -lib/unicode/In/Malayalam.pl Unicode character database -lib/unicode/In/MathematicalOperators.pl Unicode character database -lib/unicode/In/MiscellaneousSymbols.pl Unicode character database -lib/unicode/In/MiscellaneousTechnical.pl Unicode character database -lib/unicode/In/Mongolian.pl Unicode character database -lib/unicode/In/Myanmar.pl Unicode character database -lib/unicode/In/NumberForms.pl Unicode character database -lib/unicode/In/Ogham.pl Unicode character database -lib/unicode/In/OpticalCharacterRecognition.pl Unicode character database -lib/unicode/In/Oriya.pl Unicode character database -lib/unicode/In/PrivateUse.pl Unicode character database -lib/unicode/In/Runic.pl Unicode character database -lib/unicode/In/Sinhala.pl Unicode character database -lib/unicode/In/SmallFormVariants.pl Unicode character database -lib/unicode/In/SpacingModifierLetters.pl Unicode character database -lib/unicode/In/Specials.pl Unicode character database -lib/unicode/In/SuperscriptsandSubscripts.pl Unicode character database -lib/unicode/In/Syriac.pl Unicode character database -lib/unicode/In/Tamil.pl Unicode character database -lib/unicode/In/Telugu.pl Unicode character database -lib/unicode/In/Thaana.pl Unicode character database -lib/unicode/In/Thai.pl Unicode character database -lib/unicode/In/Tibetan.pl Unicode character database -lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl Unicode character database -lib/unicode/In/YiRadicals.pl Unicode character database -lib/unicode/In/YiSyllables.pl Unicode character database +lib/unicode/In.pl Unicode character database +lib/unicode/In/0.pl Unicode character database +lib/unicode/In/1.pl Unicode character database +lib/unicode/In/2.pl Unicode character database +lib/unicode/In/3.pl Unicode character database +lib/unicode/In/4.pl Unicode character database +lib/unicode/In/5.pl Unicode character database +lib/unicode/In/6.pl Unicode character database +lib/unicode/In/7.pl Unicode character database +lib/unicode/In/8.pl Unicode character database +lib/unicode/In/9.pl Unicode character database +lib/unicode/In/10.pl Unicode character database +lib/unicode/In/11.pl Unicode character database +lib/unicode/In/12.pl Unicode character database +lib/unicode/In/13.pl Unicode character database +lib/unicode/In/14.pl Unicode character database +lib/unicode/In/15.pl Unicode character database +lib/unicode/In/16.pl Unicode character database +lib/unicode/In/17.pl Unicode character database +lib/unicode/In/18.pl Unicode character database +lib/unicode/In/19.pl Unicode character database +lib/unicode/In/20.pl Unicode character database +lib/unicode/In/21.pl Unicode character database +lib/unicode/In/22.pl Unicode character database +lib/unicode/In/23.pl Unicode character database +lib/unicode/In/24.pl Unicode character database +lib/unicode/In/25.pl Unicode character database +lib/unicode/In/26.pl Unicode character database +lib/unicode/In/27.pl Unicode character database +lib/unicode/In/28.pl Unicode character database +lib/unicode/In/29.pl Unicode character database +lib/unicode/In/30.pl Unicode character database +lib/unicode/In/31.pl Unicode character database +lib/unicode/In/32.pl Unicode character database +lib/unicode/In/33.pl Unicode character database +lib/unicode/In/34.pl Unicode character database +lib/unicode/In/35.pl Unicode character database +lib/unicode/In/36.pl Unicode character database +lib/unicode/In/37.pl Unicode character database +lib/unicode/In/38.pl Unicode character database +lib/unicode/In/39.pl Unicode character database +lib/unicode/In/40.pl Unicode character database +lib/unicode/In/41.pl Unicode character database +lib/unicode/In/42.pl Unicode character database +lib/unicode/In/43.pl Unicode character database +lib/unicode/In/44.pl Unicode character database +lib/unicode/In/45.pl Unicode character database +lib/unicode/In/46.pl Unicode character database +lib/unicode/In/47.pl Unicode character database +lib/unicode/In/48.pl Unicode character database +lib/unicode/In/49.pl Unicode character database +lib/unicode/In/50.pl Unicode character database +lib/unicode/In/51.pl Unicode character database +lib/unicode/In/52.pl Unicode character database +lib/unicode/In/53.pl Unicode character database +lib/unicode/In/54.pl Unicode character database +lib/unicode/In/55.pl Unicode character database +lib/unicode/In/56.pl Unicode character database +lib/unicode/In/57.pl Unicode character database +lib/unicode/In/58.pl Unicode character database +lib/unicode/In/59.pl Unicode character database +lib/unicode/In/60.pl Unicode character database +lib/unicode/In/61.pl Unicode character database +lib/unicode/In/62.pl Unicode character database +lib/unicode/In/63.pl Unicode character database +lib/unicode/In/64.pl Unicode character database +lib/unicode/In/65.pl Unicode character database +lib/unicode/In/66.pl Unicode character database +lib/unicode/In/67.pl Unicode character database +lib/unicode/In/68.pl Unicode character database +lib/unicode/In/69.pl Unicode character database +lib/unicode/In/70.pl Unicode character database +lib/unicode/In/71.pl Unicode character database +lib/unicode/In/72.pl Unicode character database +lib/unicode/In/73.pl Unicode character database +lib/unicode/In/74.pl Unicode character database +lib/unicode/In/75.pl Unicode character database +lib/unicode/In/76.pl Unicode character database +lib/unicode/In/77.pl Unicode character database +lib/unicode/In/78.pl Unicode character database +lib/unicode/In/79.pl Unicode character database +lib/unicode/In/80.pl Unicode character database +lib/unicode/In/81.pl Unicode character database +lib/unicode/In/82.pl Unicode character database +lib/unicode/In/83.pl Unicode character database +lib/unicode/In/84.pl Unicode character database +lib/unicode/In/85.pl Unicode character database +lib/unicode/In/86.pl Unicode character database +lib/unicode/In/87.pl Unicode character database +lib/unicode/In/88.pl Unicode character database +lib/unicode/In/89.pl Unicode character database +lib/unicode/In/90.pl Unicode character database +lib/unicode/In/91.pl Unicode character database +lib/unicode/In/92.pl Unicode character database +lib/unicode/In/93.pl Unicode character database +lib/unicode/In/94.pl Unicode character database +lib/unicode/In/95.pl Unicode character database lib/unicode/Index.txt Unicode character database lib/unicode/Is/ASCII.pl Unicode character database lib/unicode/Is/Alnum.pl Unicode character database diff --git a/lib/unicode/Blocks.pl b/lib/unicode/Blocks.pl new file mode 100644 index 0000000000..ef60058ba3 --- /dev/null +++ b/lib/unicode/Blocks.pl @@ -0,0 +1,203 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +0000 007F Basic Latin +# In/0.pl BasicLatin +0080 00FF Latin-1 Supplement +# In/1.pl Latin1Supplement +0100 017F Latin Extended-A +# In/2.pl LatinExtendedA +0180 024F Latin Extended-B +# In/3.pl LatinExtendedB +0250 02AF IPA Extensions +# In/4.pl IPAExtensions +02B0 02FF Spacing Modifier Letters +# In/5.pl SpacingModifierLetters +0300 036F Combining Diacritical Marks +# In/6.pl CombiningDiacriticalMarks +0370 03FF Greek +# In/7.pl Greek +0400 04FF Cyrillic +# In/8.pl Cyrillic +0530 058F Armenian +# In/9.pl Armenian +0590 05FF Hebrew +# In/10.pl Hebrew +0600 06FF Arabic +# In/11.pl Arabic +0700 074F Syriac +# In/12.pl Syriac +0780 07BF Thaana +# In/13.pl Thaana +0900 097F Devanagari +# In/14.pl Devanagari +0980 09FF Bengali +# In/15.pl Bengali +0A00 0A7F Gurmukhi +# In/16.pl Gurmukhi +0A80 0AFF Gujarati +# In/17.pl Gujarati +0B00 0B7F Oriya +# In/18.pl Oriya +0B80 0BFF Tamil +# In/19.pl Tamil +0C00 0C7F Telugu +# In/20.pl Telugu +0C80 0CFF Kannada +# In/21.pl Kannada +0D00 0D7F Malayalam +# In/22.pl Malayalam +0D80 0DFF Sinhala +# In/23.pl Sinhala +0E00 0E7F Thai +# In/24.pl Thai +0E80 0EFF Lao +# In/25.pl Lao +0F00 0FFF Tibetan +# In/26.pl Tibetan +1000 109F Myanmar +# In/27.pl Myanmar +10A0 10FF Georgian +# In/28.pl Georgian +1100 11FF Hangul Jamo +# In/29.pl HangulJamo +1200 137F Ethiopic +# In/30.pl Ethiopic +13A0 13FF Cherokee +# In/31.pl Cherokee +1400 167F Unified Canadian Aboriginal Syllabics +# In/32.pl UnifiedCanadianAboriginalSyllabics +1680 169F Ogham +# In/33.pl Ogham +16A0 16FF Runic +# In/34.pl Runic +1780 17FF Khmer +# In/35.pl Khmer +1800 18AF Mongolian +# In/36.pl Mongolian +1E00 1EFF Latin Extended Additional +# In/37.pl LatinExtendedAdditional +1F00 1FFF Greek Extended +# In/38.pl GreekExtended +2000 206F General Punctuation +# In/39.pl GeneralPunctuation +2070 209F Superscripts and Subscripts +# In/40.pl SuperscriptsandSubscripts +20A0 20CF Currency Symbols +# In/41.pl CurrencySymbols +20D0 20FF Combining Marks for Symbols +# In/42.pl CombiningMarksforSymbols +2100 214F Letterlike Symbols +# In/43.pl LetterlikeSymbols +2150 218F Number Forms +# In/44.pl NumberForms +2190 21FF Arrows +# In/45.pl Arrows +2200 22FF Mathematical Operators +# In/46.pl MathematicalOperators +2300 23FF Miscellaneous Technical +# In/47.pl MiscellaneousTechnical +2400 243F Control Pictures +# In/48.pl ControlPictures +2440 245F Optical Character Recognition +# In/49.pl OpticalCharacterRecognition +2460 24FF Enclosed Alphanumerics +# In/50.pl EnclosedAlphanumerics +2500 257F Box Drawing +# In/51.pl BoxDrawing +2580 259F Block Elements +# In/52.pl BlockElements +25A0 25FF Geometric Shapes +# In/53.pl GeometricShapes +2600 26FF Miscellaneous Symbols +# In/54.pl MiscellaneousSymbols +2700 27BF Dingbats +# In/55.pl Dingbats +2800 28FF Braille Patterns +# In/56.pl BraillePatterns +2E80 2EFF CJK Radicals Supplement +# In/57.pl CJKRadicalsSupplement +2F00 2FDF Kangxi Radicals +# In/58.pl KangxiRadicals +2FF0 2FFF Ideographic Description Characters +# In/59.pl IdeographicDescriptionCharacters +3000 303F CJK Symbols and Punctuation +# In/60.pl CJKSymbolsandPunctuation +3040 309F Hiragana +# In/61.pl Hiragana +30A0 30FF Katakana +# In/62.pl Katakana +3100 312F Bopomofo +# In/63.pl Bopomofo +3130 318F Hangul Compatibility Jamo +# In/64.pl HangulCompatibilityJamo +3190 319F Kanbun +# In/65.pl Kanbun +31A0 31BF Bopomofo Extended +# In/66.pl BopomofoExtended +3200 32FF Enclosed CJK Letters and Months +# In/67.pl EnclosedCJKLettersandMonths +3300 33FF CJK Compatibility +# In/68.pl CJKCompatibility +3400 4DB5 CJK Unified Ideographs Extension A +# In/69.pl CJKUnifiedIdeographsExtensionA +4E00 9FFF CJK Unified Ideographs +# In/70.pl CJKUnifiedIdeographs +A000 A48F Yi Syllables +# In/71.pl YiSyllables +A490 A4CF Yi Radicals +# In/72.pl YiRadicals +AC00 D7A3 Hangul Syllables +# In/73.pl HangulSyllables +D800 DB7F High Surrogates +# In/74.pl HighSurrogates +DB80 DBFF High Private Use Surrogates +# In/75.pl HighPrivateUseSurrogates +DC00 DFFF Low Surrogates +# In/76.pl LowSurrogates +E000 F8FF Private Use +# In/77.pl PrivateUse +F900 FAFF CJK Compatibility Ideographs +# In/78.pl CJKCompatibilityIdeographs +FB00 FB4F Alphabetic Presentation Forms +# In/79.pl AlphabeticPresentationForms +FB50 FDFF Arabic Presentation Forms-A +# In/80.pl ArabicPresentationFormsA +FE20 FE2F Combining Half Marks +# In/81.pl CombiningHalfMarks +FE30 FE4F CJK Compatibility Forms +# In/82.pl CJKCompatibilityForms +FE50 FE6F Small Form Variants +# In/83.pl SmallFormVariants +FE70 FEFE Arabic Presentation Forms-B +# In/84.pl ArabicPresentationFormsB +FEFF FEFF Specials +# In/85.pl Specials +FF00 FFEF Halfwidth and Fullwidth Forms +# In/86.pl HalfwidthandFullwidthForms +FFF0 FFFD Specials +# In/85.pl Specials +10300 1032F Old Italic +# In/87.pl OldItalic +10330 1034F Gothic +# In/88.pl Gothic +10400 1044F Deseret +# In/89.pl Deseret +1D000 1D0FF Byzantine Musical Symbols +# In/90.pl ByzantineMusicalSymbols +1D100 1D1FF Musical Symbols +# In/91.pl MusicalSymbols +1D400 1D7FF Mathematical Alphanumeric Symbols +# In/92.pl MathematicalAlphanumericSymbols +20000 2A6D6 CJK Unified Ideographs Extension B +# In/93.pl CJKUnifiedIdeographsExtensionB +2F800 2FA1F CJK Compatibility Ideographs Supplement +# In/94.pl CJKCompatibilityIdeographsSupplement +E0000 E007F Tags +# In/95.pl Tags +F0000 FFFFD Private Use +# In/77.pl PrivateUse +100000 10FFFD Private Use +# In/77.pl PrivateUse +END diff --git a/lib/unicode/In.pl b/lib/unicode/In.pl new file mode 100644 index 0000000000..eefec27310 --- /dev/null +++ b/lib/unicode/In.pl @@ -0,0 +1,101 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +%utf8::In = ( +'BasicLatin' => 0, +'Latin1Supplement' => 1, +'Hebrew' => 10, +'Arabic' => 11, +'Syriac' => 12, +'Thaana' => 13, +'Devanagari' => 14, +'Bengali' => 15, +'Gurmukhi' => 16, +'Gujarati' => 17, +'Oriya' => 18, +'Tamil' => 19, +'LatinExtendedA' => 2, +'Telugu' => 20, +'Kannada' => 21, +'Malayalam' => 22, +'Sinhala' => 23, +'Thai' => 24, +'Lao' => 25, +'Tibetan' => 26, +'Myanmar' => 27, +'Georgian' => 28, +'HangulJamo' => 29, +'LatinExtendedB' => 3, +'Ethiopic' => 30, +'Cherokee' => 31, +'UnifiedCanadianAboriginalSyllabics' => 32, +'Ogham' => 33, +'Runic' => 34, +'Khmer' => 35, +'Mongolian' => 36, +'LatinExtendedAdditional' => 37, +'GreekExtended' => 38, +'GeneralPunctuation' => 39, +'IPAExtensions' => 4, +'SuperscriptsandSubscripts' => 40, +'CurrencySymbols' => 41, +'CombiningMarksforSymbols' => 42, +'LetterlikeSymbols' => 43, +'NumberForms' => 44, +'Arrows' => 45, +'MathematicalOperators' => 46, +'MiscellaneousTechnical' => 47, +'ControlPictures' => 48, +'OpticalCharacterRecognition' => 49, +'SpacingModifierLetters' => 5, +'EnclosedAlphanumerics' => 50, +'BoxDrawing' => 51, +'BlockElements' => 52, +'GeometricShapes' => 53, +'MiscellaneousSymbols' => 54, +'Dingbats' => 55, +'BraillePatterns' => 56, +'CJKRadicalsSupplement' => 57, +'KangxiRadicals' => 58, +'IdeographicDescriptionCharacters' => 59, +'CombiningDiacriticalMarks' => 6, +'CJKSymbolsandPunctuation' => 60, +'Hiragana' => 61, +'Katakana' => 62, +'Bopomofo' => 63, +'HangulCompatibilityJamo' => 64, +'Kanbun' => 65, +'BopomofoExtended' => 66, +'EnclosedCJKLettersandMonths' => 67, +'CJKCompatibility' => 68, +'CJKUnifiedIdeographsExtensionA' => 69, +'Greek' => 7, +'CJKUnifiedIdeographs' => 70, +'YiSyllables' => 71, +'YiRadicals' => 72, +'HangulSyllables' => 73, +'HighSurrogates' => 74, +'HighPrivateUseSurrogates' => 75, +'LowSurrogates' => 76, +'PrivateUse' => 77, +'CJKCompatibilityIdeographs' => 78, +'AlphabeticPresentationForms' => 79, +'Cyrillic' => 8, +'ArabicPresentationFormsA' => 80, +'CombiningHalfMarks' => 81, +'CJKCompatibilityForms' => 82, +'SmallFormVariants' => 83, +'ArabicPresentationFormsB' => 84, +'Specials' => 85, +'HalfwidthandFullwidthForms' => 86, +'OldItalic' => 87, +'Gothic' => 88, +'Deseret' => 89, +'Armenian' => 9, +'ByzantineMusicalSymbols' => 90, +'MusicalSymbols' => 91, +'MathematicalAlphanumericSymbols' => 92, +'CJKUnifiedIdeographsExtensionB' => 93, +'CJKCompatibilityIdeographsSupplement' => 94, +'Tags' => 95, +); diff --git a/lib/unicode/In/BasicLatin.pl b/lib/unicode/In/0.pl index 475c1dfed0..475c1dfed0 100644 --- a/lib/unicode/In/BasicLatin.pl +++ b/lib/unicode/In/0.pl diff --git a/lib/unicode/In/Latin-1Supplement.pl b/lib/unicode/In/1.pl index 5a5aa0e18c..5a5aa0e18c 100644 --- a/lib/unicode/In/Latin-1Supplement.pl +++ b/lib/unicode/In/1.pl diff --git a/lib/unicode/In/Hebrew.pl b/lib/unicode/In/10.pl index f1d866c049..f1d866c049 100644 --- a/lib/unicode/In/Hebrew.pl +++ b/lib/unicode/In/10.pl diff --git a/lib/unicode/In/Arabic.pl b/lib/unicode/In/11.pl index 7546a743b5..7546a743b5 100644 --- a/lib/unicode/In/Arabic.pl +++ b/lib/unicode/In/11.pl diff --git a/lib/unicode/In/Syriac.pl b/lib/unicode/In/12.pl index e5247ad937..e5247ad937 100644 --- a/lib/unicode/In/Syriac.pl +++ b/lib/unicode/In/12.pl diff --git a/lib/unicode/In/Thaana.pl b/lib/unicode/In/13.pl index 5bda401f7b..5bda401f7b 100644 --- a/lib/unicode/In/Thaana.pl +++ b/lib/unicode/In/13.pl diff --git a/lib/unicode/In/Devanagari.pl b/lib/unicode/In/14.pl index a20b68d031..a20b68d031 100644 --- a/lib/unicode/In/Devanagari.pl +++ b/lib/unicode/In/14.pl diff --git a/lib/unicode/In/Bengali.pl b/lib/unicode/In/15.pl index 306f653dbc..306f653dbc 100644 --- a/lib/unicode/In/Bengali.pl +++ b/lib/unicode/In/15.pl diff --git a/lib/unicode/In/Gurmukhi.pl b/lib/unicode/In/16.pl index d37d4849ca..d37d4849ca 100644 --- a/lib/unicode/In/Gurmukhi.pl +++ b/lib/unicode/In/16.pl diff --git a/lib/unicode/In/Gujarati.pl b/lib/unicode/In/17.pl index 65d853b314..65d853b314 100644 --- a/lib/unicode/In/Gujarati.pl +++ b/lib/unicode/In/17.pl diff --git a/lib/unicode/In/Oriya.pl b/lib/unicode/In/18.pl index 14e1027fb6..14e1027fb6 100644 --- a/lib/unicode/In/Oriya.pl +++ b/lib/unicode/In/18.pl diff --git a/lib/unicode/In/Tamil.pl b/lib/unicode/In/19.pl index a28ba3d909..a28ba3d909 100644 --- a/lib/unicode/In/Tamil.pl +++ b/lib/unicode/In/19.pl diff --git a/lib/unicode/In/LatinExtended-A.pl b/lib/unicode/In/2.pl index 0f6acf9853..0f6acf9853 100644 --- a/lib/unicode/In/LatinExtended-A.pl +++ b/lib/unicode/In/2.pl diff --git a/lib/unicode/In/Telugu.pl b/lib/unicode/In/20.pl index aff6cc93f4..aff6cc93f4 100644 --- a/lib/unicode/In/Telugu.pl +++ b/lib/unicode/In/20.pl diff --git a/lib/unicode/In/Kannada.pl b/lib/unicode/In/21.pl index 41e05bdc3b..41e05bdc3b 100644 --- a/lib/unicode/In/Kannada.pl +++ b/lib/unicode/In/21.pl diff --git a/lib/unicode/In/Malayalam.pl b/lib/unicode/In/22.pl index b42bbeea8d..b42bbeea8d 100644 --- a/lib/unicode/In/Malayalam.pl +++ b/lib/unicode/In/22.pl diff --git a/lib/unicode/In/Sinhala.pl b/lib/unicode/In/23.pl index 00da6d144f..00da6d144f 100644 --- a/lib/unicode/In/Sinhala.pl +++ b/lib/unicode/In/23.pl diff --git a/lib/unicode/In/Thai.pl b/lib/unicode/In/24.pl index 2fa00eb135..2fa00eb135 100644 --- a/lib/unicode/In/Thai.pl +++ b/lib/unicode/In/24.pl diff --git a/lib/unicode/In/Lao.pl b/lib/unicode/In/25.pl index 5fd607c08f..5fd607c08f 100644 --- a/lib/unicode/In/Lao.pl +++ b/lib/unicode/In/25.pl diff --git a/lib/unicode/In/Tibetan.pl b/lib/unicode/In/26.pl index 3ae5e6248d..3ae5e6248d 100644 --- a/lib/unicode/In/Tibetan.pl +++ b/lib/unicode/In/26.pl diff --git a/lib/unicode/In/Myanmar.pl b/lib/unicode/In/27.pl index ecc3448361..ecc3448361 100644 --- a/lib/unicode/In/Myanmar.pl +++ b/lib/unicode/In/27.pl diff --git a/lib/unicode/In/Georgian.pl b/lib/unicode/In/28.pl index 73a8818c71..73a8818c71 100644 --- a/lib/unicode/In/Georgian.pl +++ b/lib/unicode/In/28.pl diff --git a/lib/unicode/In/HangulJamo.pl b/lib/unicode/In/29.pl index 692be7d813..692be7d813 100644 --- a/lib/unicode/In/HangulJamo.pl +++ b/lib/unicode/In/29.pl diff --git a/lib/unicode/In/LatinExtended-B.pl b/lib/unicode/In/3.pl index 68f093234e..68f093234e 100644 --- a/lib/unicode/In/LatinExtended-B.pl +++ b/lib/unicode/In/3.pl diff --git a/lib/unicode/In/Ethiopic.pl b/lib/unicode/In/30.pl index ad4776df99..ad4776df99 100644 --- a/lib/unicode/In/Ethiopic.pl +++ b/lib/unicode/In/30.pl diff --git a/lib/unicode/In/Cherokee.pl b/lib/unicode/In/31.pl index f40dfa2be0..f40dfa2be0 100644 --- a/lib/unicode/In/Cherokee.pl +++ b/lib/unicode/In/31.pl diff --git a/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl b/lib/unicode/In/32.pl index 7318008076..7318008076 100644 --- a/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl +++ b/lib/unicode/In/32.pl diff --git a/lib/unicode/In/Ogham.pl b/lib/unicode/In/33.pl index 5d7bd970e8..5d7bd970e8 100644 --- a/lib/unicode/In/Ogham.pl +++ b/lib/unicode/In/33.pl diff --git a/lib/unicode/In/Runic.pl b/lib/unicode/In/34.pl index d404cb6cfb..d404cb6cfb 100644 --- a/lib/unicode/In/Runic.pl +++ b/lib/unicode/In/34.pl diff --git a/lib/unicode/In/Khmer.pl b/lib/unicode/In/35.pl index 2b0b198216..2b0b198216 100644 --- a/lib/unicode/In/Khmer.pl +++ b/lib/unicode/In/35.pl diff --git a/lib/unicode/In/Mongolian.pl b/lib/unicode/In/36.pl index 06526c64d9..06526c64d9 100644 --- a/lib/unicode/In/Mongolian.pl +++ b/lib/unicode/In/36.pl diff --git a/lib/unicode/In/LatinExtendedAdditional.pl b/lib/unicode/In/37.pl index c288810ca2..c288810ca2 100644 --- a/lib/unicode/In/LatinExtendedAdditional.pl +++ b/lib/unicode/In/37.pl diff --git a/lib/unicode/In/GreekExtended.pl b/lib/unicode/In/38.pl index 74cd2c88e0..74cd2c88e0 100644 --- a/lib/unicode/In/GreekExtended.pl +++ b/lib/unicode/In/38.pl diff --git a/lib/unicode/In/GeneralPunctuation.pl b/lib/unicode/In/39.pl index b9b0e7efaa..b9b0e7efaa 100644 --- a/lib/unicode/In/GeneralPunctuation.pl +++ b/lib/unicode/In/39.pl diff --git a/lib/unicode/In/IPAExtensions.pl b/lib/unicode/In/4.pl index f6e9454fe0..f6e9454fe0 100644 --- a/lib/unicode/In/IPAExtensions.pl +++ b/lib/unicode/In/4.pl diff --git a/lib/unicode/In/SuperscriptsandSubscripts.pl b/lib/unicode/In/40.pl index 2e36ac331c..2e36ac331c 100644 --- a/lib/unicode/In/SuperscriptsandSubscripts.pl +++ b/lib/unicode/In/40.pl diff --git a/lib/unicode/In/CurrencySymbols.pl b/lib/unicode/In/41.pl index 12c67371cc..12c67371cc 100644 --- a/lib/unicode/In/CurrencySymbols.pl +++ b/lib/unicode/In/41.pl diff --git a/lib/unicode/In/CombiningMarksforSymbols.pl b/lib/unicode/In/42.pl index 2d58a56712..2d58a56712 100644 --- a/lib/unicode/In/CombiningMarksforSymbols.pl +++ b/lib/unicode/In/42.pl diff --git a/lib/unicode/In/LetterlikeSymbols.pl b/lib/unicode/In/43.pl index c735821edc..c735821edc 100644 --- a/lib/unicode/In/LetterlikeSymbols.pl +++ b/lib/unicode/In/43.pl diff --git a/lib/unicode/In/NumberForms.pl b/lib/unicode/In/44.pl index a1949a194d..a1949a194d 100644 --- a/lib/unicode/In/NumberForms.pl +++ b/lib/unicode/In/44.pl diff --git a/lib/unicode/In/Arrows.pl b/lib/unicode/In/45.pl index 799f739085..799f739085 100644 --- a/lib/unicode/In/Arrows.pl +++ b/lib/unicode/In/45.pl diff --git a/lib/unicode/In/MathematicalOperators.pl b/lib/unicode/In/46.pl index 8bc8295cc5..8bc8295cc5 100644 --- a/lib/unicode/In/MathematicalOperators.pl +++ b/lib/unicode/In/46.pl diff --git a/lib/unicode/In/MiscellaneousTechnical.pl b/lib/unicode/In/47.pl index 67867951d6..67867951d6 100644 --- a/lib/unicode/In/MiscellaneousTechnical.pl +++ b/lib/unicode/In/47.pl diff --git a/lib/unicode/In/ControlPictures.pl b/lib/unicode/In/48.pl index 7aad2fcacf..7aad2fcacf 100644 --- a/lib/unicode/In/ControlPictures.pl +++ b/lib/unicode/In/48.pl diff --git a/lib/unicode/In/OpticalCharacterRecognition.pl b/lib/unicode/In/49.pl index c7cecd02da..c7cecd02da 100644 --- a/lib/unicode/In/OpticalCharacterRecognition.pl +++ b/lib/unicode/In/49.pl diff --git a/lib/unicode/In/SpacingModifierLetters.pl b/lib/unicode/In/5.pl index a242e0207a..a242e0207a 100644 --- a/lib/unicode/In/SpacingModifierLetters.pl +++ b/lib/unicode/In/5.pl diff --git a/lib/unicode/In/EnclosedAlphanumerics.pl b/lib/unicode/In/50.pl index 7b1b778af0..7b1b778af0 100644 --- a/lib/unicode/In/EnclosedAlphanumerics.pl +++ b/lib/unicode/In/50.pl diff --git a/lib/unicode/In/BoxDrawing.pl b/lib/unicode/In/51.pl index 4d446863fe..4d446863fe 100644 --- a/lib/unicode/In/BoxDrawing.pl +++ b/lib/unicode/In/51.pl diff --git a/lib/unicode/In/BlockElements.pl b/lib/unicode/In/52.pl index 6135c93e90..6135c93e90 100644 --- a/lib/unicode/In/BlockElements.pl +++ b/lib/unicode/In/52.pl diff --git a/lib/unicode/In/GeometricShapes.pl b/lib/unicode/In/53.pl index 855d98ebff..855d98ebff 100644 --- a/lib/unicode/In/GeometricShapes.pl +++ b/lib/unicode/In/53.pl diff --git a/lib/unicode/In/MiscellaneousSymbols.pl b/lib/unicode/In/54.pl index 0949bc2b55..0949bc2b55 100644 --- a/lib/unicode/In/MiscellaneousSymbols.pl +++ b/lib/unicode/In/54.pl diff --git a/lib/unicode/In/Dingbats.pl b/lib/unicode/In/55.pl index 3013f73c75..3013f73c75 100644 --- a/lib/unicode/In/Dingbats.pl +++ b/lib/unicode/In/55.pl diff --git a/lib/unicode/In/BraillePatterns.pl b/lib/unicode/In/56.pl index d785c31676..d785c31676 100644 --- a/lib/unicode/In/BraillePatterns.pl +++ b/lib/unicode/In/56.pl diff --git a/lib/unicode/In/CJKRadicalsSupplement.pl b/lib/unicode/In/57.pl index 2bf56517d1..2bf56517d1 100644 --- a/lib/unicode/In/CJKRadicalsSupplement.pl +++ b/lib/unicode/In/57.pl diff --git a/lib/unicode/In/KangxiRadicals.pl b/lib/unicode/In/58.pl index 3903f15c4c..3903f15c4c 100644 --- a/lib/unicode/In/KangxiRadicals.pl +++ b/lib/unicode/In/58.pl diff --git a/lib/unicode/In/IdeographicDescriptionCharacters.pl b/lib/unicode/In/59.pl index 07799e6941..07799e6941 100644 --- a/lib/unicode/In/IdeographicDescriptionCharacters.pl +++ b/lib/unicode/In/59.pl diff --git a/lib/unicode/In/CombiningDiacriticalMarks.pl b/lib/unicode/In/6.pl index cf9bb94991..cf9bb94991 100644 --- a/lib/unicode/In/CombiningDiacriticalMarks.pl +++ b/lib/unicode/In/6.pl diff --git a/lib/unicode/In/CJKSymbolsandPunctuation.pl b/lib/unicode/In/60.pl index 0c66f051a4..0c66f051a4 100644 --- a/lib/unicode/In/CJKSymbolsandPunctuation.pl +++ b/lib/unicode/In/60.pl diff --git a/lib/unicode/In/Hiragana.pl b/lib/unicode/In/61.pl index 49b4e4976a..49b4e4976a 100644 --- a/lib/unicode/In/Hiragana.pl +++ b/lib/unicode/In/61.pl diff --git a/lib/unicode/In/Katakana.pl b/lib/unicode/In/62.pl index e5568a283a..e5568a283a 100644 --- a/lib/unicode/In/Katakana.pl +++ b/lib/unicode/In/62.pl diff --git a/lib/unicode/In/Bopomofo.pl b/lib/unicode/In/63.pl index 4f9b5f46b2..4f9b5f46b2 100644 --- a/lib/unicode/In/Bopomofo.pl +++ b/lib/unicode/In/63.pl diff --git a/lib/unicode/In/HangulCompatibilityJamo.pl b/lib/unicode/In/64.pl index b15c4cc760..b15c4cc760 100644 --- a/lib/unicode/In/HangulCompatibilityJamo.pl +++ b/lib/unicode/In/64.pl diff --git a/lib/unicode/In/Kanbun.pl b/lib/unicode/In/65.pl index d78c2088c0..d78c2088c0 100644 --- a/lib/unicode/In/Kanbun.pl +++ b/lib/unicode/In/65.pl diff --git a/lib/unicode/In/BopomofoExtended.pl b/lib/unicode/In/66.pl index 96150b4f3f..96150b4f3f 100644 --- a/lib/unicode/In/BopomofoExtended.pl +++ b/lib/unicode/In/66.pl diff --git a/lib/unicode/In/EnclosedCJKLettersandMonths.pl b/lib/unicode/In/67.pl index 2708fec7e3..2708fec7e3 100644 --- a/lib/unicode/In/EnclosedCJKLettersandMonths.pl +++ b/lib/unicode/In/67.pl diff --git a/lib/unicode/In/CJKCompatibility.pl b/lib/unicode/In/68.pl index d504529398..d504529398 100644 --- a/lib/unicode/In/CJKCompatibility.pl +++ b/lib/unicode/In/68.pl diff --git a/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl b/lib/unicode/In/69.pl index 83adb815d7..83adb815d7 100644 --- a/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl +++ b/lib/unicode/In/69.pl diff --git a/lib/unicode/In/Greek.pl b/lib/unicode/In/7.pl index 8d89b7176b..8d89b7176b 100644 --- a/lib/unicode/In/Greek.pl +++ b/lib/unicode/In/7.pl diff --git a/lib/unicode/In/CJKUnifiedIdeographs.pl b/lib/unicode/In/70.pl index f74552e661..f74552e661 100644 --- a/lib/unicode/In/CJKUnifiedIdeographs.pl +++ b/lib/unicode/In/70.pl diff --git a/lib/unicode/In/YiSyllables.pl b/lib/unicode/In/71.pl index 0636a82e56..0636a82e56 100644 --- a/lib/unicode/In/YiSyllables.pl +++ b/lib/unicode/In/71.pl diff --git a/lib/unicode/In/YiRadicals.pl b/lib/unicode/In/72.pl index 56404c5fb5..56404c5fb5 100644 --- a/lib/unicode/In/YiRadicals.pl +++ b/lib/unicode/In/72.pl diff --git a/lib/unicode/In/HangulSyllables.pl b/lib/unicode/In/73.pl index e1e26945e5..e1e26945e5 100644 --- a/lib/unicode/In/HangulSyllables.pl +++ b/lib/unicode/In/73.pl diff --git a/lib/unicode/In/HighSurrogates.pl b/lib/unicode/In/74.pl index 0f4eb5727b..0f4eb5727b 100644 --- a/lib/unicode/In/HighSurrogates.pl +++ b/lib/unicode/In/74.pl diff --git a/lib/unicode/In/HighPrivateUseSurrogates.pl b/lib/unicode/In/75.pl index ec4ca07885..ec4ca07885 100644 --- a/lib/unicode/In/HighPrivateUseSurrogates.pl +++ b/lib/unicode/In/75.pl diff --git a/lib/unicode/In/LowSurrogates.pl b/lib/unicode/In/76.pl index d056168c66..d056168c66 100644 --- a/lib/unicode/In/LowSurrogates.pl +++ b/lib/unicode/In/76.pl diff --git a/lib/unicode/In/77.pl b/lib/unicode/In/77.pl new file mode 100644 index 0000000000..530166da95 --- /dev/null +++ b/lib/unicode/In/77.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +100000 10FFFD +END diff --git a/lib/unicode/In/CJKCompatibilityIdeographs.pl b/lib/unicode/In/78.pl index 0c553d6ee2..0c553d6ee2 100644 --- a/lib/unicode/In/CJKCompatibilityIdeographs.pl +++ b/lib/unicode/In/78.pl diff --git a/lib/unicode/In/AlphabeticPresentationForms.pl b/lib/unicode/In/79.pl index 42cc1ca029..42cc1ca029 100644 --- a/lib/unicode/In/AlphabeticPresentationForms.pl +++ b/lib/unicode/In/79.pl diff --git a/lib/unicode/In/Cyrillic.pl b/lib/unicode/In/8.pl index 0075ce1ddf..0075ce1ddf 100644 --- a/lib/unicode/In/Cyrillic.pl +++ b/lib/unicode/In/8.pl diff --git a/lib/unicode/In/ArabicPresentationForms-A.pl b/lib/unicode/In/80.pl index ffb4f1eb3a..ffb4f1eb3a 100644 --- a/lib/unicode/In/ArabicPresentationForms-A.pl +++ b/lib/unicode/In/80.pl diff --git a/lib/unicode/In/CombiningHalfMarks.pl b/lib/unicode/In/81.pl index cc8a4a21b1..cc8a4a21b1 100644 --- a/lib/unicode/In/CombiningHalfMarks.pl +++ b/lib/unicode/In/81.pl diff --git a/lib/unicode/In/CJKCompatibilityForms.pl b/lib/unicode/In/82.pl index 4e462b8402..4e462b8402 100644 --- a/lib/unicode/In/CJKCompatibilityForms.pl +++ b/lib/unicode/In/82.pl diff --git a/lib/unicode/In/SmallFormVariants.pl b/lib/unicode/In/83.pl index 4eff1ea01e..4eff1ea01e 100644 --- a/lib/unicode/In/SmallFormVariants.pl +++ b/lib/unicode/In/83.pl diff --git a/lib/unicode/In/ArabicPresentationForms-B.pl b/lib/unicode/In/84.pl index dc5a32e4b1..dc5a32e4b1 100644 --- a/lib/unicode/In/ArabicPresentationForms-B.pl +++ b/lib/unicode/In/84.pl diff --git a/lib/unicode/In/Specials.pl b/lib/unicode/In/85.pl index 931fc5b902..931fc5b902 100644 --- a/lib/unicode/In/Specials.pl +++ b/lib/unicode/In/85.pl diff --git a/lib/unicode/In/HalfwidthandFullwidthForms.pl b/lib/unicode/In/86.pl index 03e85154fb..03e85154fb 100644 --- a/lib/unicode/In/HalfwidthandFullwidthForms.pl +++ b/lib/unicode/In/86.pl diff --git a/lib/unicode/Block.pl b/lib/unicode/In/87.pl index 272f63fc9f..44a5e47510 100644 --- a/lib/unicode/Block.pl +++ b/lib/unicode/In/87.pl @@ -2,4 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; +10300 1032F END diff --git a/lib/unicode/In/88.pl b/lib/unicode/In/88.pl new file mode 100644 index 0000000000..803041101c --- /dev/null +++ b/lib/unicode/In/88.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +10330 1034F +END diff --git a/lib/unicode/In/89.pl b/lib/unicode/In/89.pl new file mode 100644 index 0000000000..d2c50bbcad --- /dev/null +++ b/lib/unicode/In/89.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +10400 1044F +END diff --git a/lib/unicode/In/Armenian.pl b/lib/unicode/In/9.pl index a6d50e3be5..a6d50e3be5 100644 --- a/lib/unicode/In/Armenian.pl +++ b/lib/unicode/In/9.pl diff --git a/lib/unicode/In/90.pl b/lib/unicode/In/90.pl new file mode 100644 index 0000000000..f1073c7392 --- /dev/null +++ b/lib/unicode/In/90.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +1D000 1D0FF +END diff --git a/lib/unicode/In/91.pl b/lib/unicode/In/91.pl new file mode 100644 index 0000000000..7435889d7c --- /dev/null +++ b/lib/unicode/In/91.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +1D100 1D1FF +END diff --git a/lib/unicode/In/92.pl b/lib/unicode/In/92.pl new file mode 100644 index 0000000000..7e40edc3ed --- /dev/null +++ b/lib/unicode/In/92.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +1D400 1D7FF +END diff --git a/lib/unicode/In/93.pl b/lib/unicode/In/93.pl new file mode 100644 index 0000000000..931aec3891 --- /dev/null +++ b/lib/unicode/In/93.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +20000 2A6D6 +END diff --git a/lib/unicode/In/94.pl b/lib/unicode/In/94.pl new file mode 100644 index 0000000000..c025148c04 --- /dev/null +++ b/lib/unicode/In/94.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +2F800 2FA1F +END diff --git a/lib/unicode/In/95.pl b/lib/unicode/In/95.pl new file mode 100644 index 0000000000..495d2d581d --- /dev/null +++ b/lib/unicode/In/95.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +E0000 E007F +END diff --git a/lib/unicode/In/PrivateUse.pl b/lib/unicode/In/PrivateUse.pl deleted file mode 100644 index c81b567a74..0000000000 --- a/lib/unicode/In/PrivateUse.pl +++ /dev/null @@ -1,6 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.txt. -# Any changes made here will be lost! -return <<'END'; -E000 F8FF -END diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL index 818785452b..68578b974f 100755 --- a/lib/unicode/mktables.PL +++ b/lib/unicode/mktables.PL @@ -231,11 +231,24 @@ mkdir "To", 0755; # This is not written for speed... +my %InId; +my $InId = 0; + foreach $file (@todo) { my ($table, $wanted, $val) = @$file; next if @ARGV and not grep { $_ eq $table } @ARGV; - print $table,"\n"; - if ($table =~ /^(Is|In|To)(.*)/) { + print $table, "\n"; + $table =~ s/\W+//g; + if ($table =~ /^In(.+)/) { + my $id; + unless (exists $InId{$1}) { + $InId{$1} = $InId++; + } + $id = $InId{$1}; + open(OUT, ">In/$id.pl") or die "Can't create In/$id.pl: $!\n"; + print OUT "# In/$id.pl $1\n"; + } + elsif ($table =~ /^(Is|To)(.+)/) { open(OUT, ">$1/$2.pl") or die "Can't create $1/$2.pl: $!\n"; } else { @@ -257,9 +270,9 @@ END # Must treat blocks specially. exit if @ARGV and not grep { $_ eq Block } @ARGV; -print "Block\n"; +print "Blocks\n"; open(UD, 'Blocks.txt') or die "Can't open Blocks.txt: $!\n"; -open(OUT, ">Block.pl") or die "Can't create Block.pl: $!\n"; +open(OUT, ">Blocks.pl") or die "Can't create Blocks.pl: $!\n"; print OUT <<EOH; # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! # This file is built by $0 from e.g. $UnicodeData. @@ -273,11 +286,17 @@ while (<UD>) { next if /^#/; next if /^$/; chomp; - ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]); (.+)/i; + ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+)/i; if ($name) { print OUT "$code $last $name\n"; - $name =~ s/\s+//g; - open(BLOCK, ">In/$name.pl"); + $name =~ s/\W+//g; + my $id; + unless (exists $InId{$name}) { + $InId{$name} = $InId++; + } + $id = $InId{$name}; + open(BLOCK, ">In/$id.pl"); + print OUT "# In/$id.pl $name\n"; print BLOCK <<EOH; # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! # This file is built by $0 from e.g. $UnicodeData. @@ -295,6 +314,24 @@ END2 print OUT "END\n"; close OUT; +open(INID, ">In.pl"); + +print INID <<EOH; +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by $0 from e.g. $UnicodeData. +# Any changes made here will be lost! +%utf8::In = ( +EOH + +# Order doesn't matter but let's prettyprint anyway. +foreach my $in (sort { $InId{$a} cmp $InId{$b} } keys %InId) { + print INID "'$in' => $InId{$in},\n"; +} + +print INID ");\n"; + +close(INID); + ################################################## sub proplist { diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index 8649e9e07e..5637d12afa 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -26,7 +26,14 @@ sub SWASHNEW { while (($caller = caller($i)) eq __PACKAGE__) { $i++ } my $encoding = $enc{$caller} || "unicode"; (my $file = $type) =~ s!::!/!g; - $file =~ s#^(I[sn]|To)([A-Z].*)#$1/$2#; + if ($file =~ /^In(.+)/) { + defined %utf8::In || do "$encoding/In.pl"; + if (exists $utf8::In{$1}) { + $file = "$enconding/In/$utf8::In{$1}"; + } + } else { + $file =~ s#^(Is|To)([A-Z].*)#$1/$2#; + } $list ||= eval { $caller->$type(); } || do "$file.pl" || do "$encoding/$file.pl" diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod index 8ddcdd2b06..12bee5c7a3 100644 --- a/pod/perlunicode.pod +++ b/pod/perlunicode.pod @@ -158,9 +158,12 @@ Named Unicode properties and block ranges make be used as character classes via the new C<\p{}> (matches property) and C<\P{}> (doesn't match property) constructs. For instance, C<\p{Lu}> matches any character with the Unicode uppercase property, while C<\p{M}> matches -any mark character. Single letter properties may omit the brackets, so -that can be written C<\pM> also. Many predefined character classes are -available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>. +any mark character. Single letter properties may omit the brackets, +so that can be written C<\pM> also. Many predefined character classes +are available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>. The +names of the C<In> classes are the official Unicode block names but +with all non-alphanumeric characters removed, for example the block +name C<"Latin-1 Supplement"> becomes C<\p{InLatin1Supplement}>. =item * |